Spaces:

sampleacc-3003
/

Video_stitch

Running

App Files Files Community

sampleacc-3003 commited on 6 days ago

Commit

a6f03b3

verified ·

1 Parent(s): 3d7c01a

Update app.py

Browse files

Files changed (1) hide show

app.py +573 -455

app.py CHANGED Viewed

@@ -7,615 +7,733 @@ import requests
 import re
 import textwrap
 import shutil
-import time
 from datetime import datetime
 from PIL import Image, ImageDraw, ImageFont
-from functools import lru_cache
 # ========================================
 # CONFIGURATION SECTION - CUSTOMIZE HERE
 # ========================================
 REDDIT_CONFIG = {
-  'template_file': 'reddit_template.png',
-  'font_file': 'RFDewi-Semibold.ttf',
-  'font_size_max': 120,
-  'font_size_min': 16,
-  'text_wrap_width': 50,
-  'text_color': 'black',
-  'line_spacing': 10,
-  'text_box_width_percent': 0.8,
-  'text_box_height_percent': 0.5,
-  'y_offset': 20,
 }
 SUBTITLE_CONFIG = {
-  'font_file': 'komiko_axis.ttf',
-  'font_name': 'Komika Axis',
-  'font_size_default': 18,
-  'position_alignment': 5,
-  'margin_left': 10,
-  'margin_right': 10,
   'margin_vertical': 0,
 }
 VIDEO_CONFIG = {
-  'reddit_scale_percent': 0.65,
-  'fade_start_percent': 0.6,
-  'fade_end_percent': 0.75,
-  'promo_percent': 0.1,
-  'fade_color_rgb': (218, 207, 195),
-  'fade_color_hex': '#DACFC3',
-  'book_fade_in_duration': 2,
-  # Performance settings
-  'encoding_preset': 'faster',  # Options: ultrafast, superfast, veryfast, faster, fast, medium
-  'threads': 0,  # 0 = auto-detect
 }
 # ========================================
-# END CONFIGURATION
 # ========================================
 static_ffmpeg.add_paths()
-# Utility Functions
-def load_font(font_paths, font_size, fallback='Verdana'):
-  """Load font from multiple locations with fallback."""
-  for path in font_paths:
-      if os.path.exists(path):
-          try:
-              return ImageFont.truetype(path, font_size)
-          except:
-              pass
-  try:
-      return ImageFont.truetype(fallback, font_size)
-  except:
-      return ImageFont.load_default()
-def time_to_seconds(time_str):
-  """Convert SRT time to seconds."""
-  h, m, s = time_str.split(':')
-  s, ms = s.split(',')
-  return int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 1000.0
-def format_elapsed_time(seconds):
-  """Format elapsed time as MM:SS."""
-  mins = int(seconds // 60)
-  secs = int(seconds % 60)
-  return f"{mins}:{secs:02d}"
-def run_ffmpeg_cmd(cmd, env, description="", start_time=None):
-  """Execute FFmpeg command with error handling and timing."""
-  step_start = time.time()
-  try:
-      subprocess.run(cmd, check=True, capture_output=True, text=True, env=env)
-      elapsed = time.time() - step_start
-      total_elapsed = time.time() - start_time if start_time else elapsed
-      return True, None, f"✅ {description} ({elapsed:.1f}s) | Total: {format_elapsed_time(total_elapsed)}"
-  except subprocess.CalledProcessError as e:
-      error_msg = e.stderr[-1000:] if e.stderr else str(e)
-      return False, f"{description} failed: {error_msg}", None
-# Font Setup
 def setup_custom_fonts_hf(temp_dir):
-  """Setup fonts for HF Spaces compatibility."""
   try:
       fonts_dir = os.path.join(temp_dir, 'fonts')
       os.makedirs(fonts_dir, exist_ok=True)
       script_dir = os.path.dirname(os.path.abspath(__file__))
       fonts_to_copy = []
-      # Check fonts/ subdirectory
-      repo_fonts_dir = os.path.join(script_dir, 'fonts')
       if os.path.exists(repo_fonts_dir):
-          fonts_to_copy.extend([
-              os.path.join(repo_fonts_dir, f)
-              for f in os.listdir(repo_fonts_dir)
-              if f.lower().endswith(('.ttf', '.otf'))
-          ])
-      # Check root directory
-      for font_file in [REDDIT_CONFIG['font_file'], SUBTITLE_CONFIG['font_file']]:
-          font_path = os.path.join(script_dir, font_file)
           if os.path.exists(font_path) and font_path not in fonts_to_copy:
               fonts_to_copy.append(font_path)
-      # Copy fonts
       for src in fonts_to_copy:
-          shutil.copy(src, os.path.join(fonts_dir, os.path.basename(src)))
       if fonts_to_copy:
-          with open(os.path.join(temp_dir, 'fonts.conf'), 'w') as f:
-              f.write(f"""<?xml version="1.0"?>
 <fontconfig>
 <dir>{fonts_dir}</dir>
 <cachedir>{temp_dir}/cache</cachedir>
-</fontconfig>""")
           env = os.environ.copy()
-          env['FONTCONFIG_FILE'] = os.path.join(temp_dir, 'fonts.conf')
           env['FONTCONFIG_PATH'] = temp_dir
           return env
       return os.environ.copy()
-  except:
       return os.environ.copy()
-# File Handling
 def download_file_from_url(url, output_dir, filename):
-  """Download file from URL."""
-  response = requests.get(url, stream=True, timeout=30)
-  response.raise_for_status()
-  file_path = os.path.join(output_dir, filename)
-  with open(file_path, 'wb') as f:
-      for chunk in response.iter_content(chunk_size=8192):
-          f.write(chunk)
-  return file_path
 def download_book_cover(book_id, output_dir):
-  """Download book cover from Google Books."""
-  url = f"https://books.google.com/books/publisher/content/images/frontcover/{book_id}?fife=w720-h1280&source=gbs_api"
-  response = requests.get(url, timeout=30)
-  response.raise_for_status()
-  path = os.path.join(output_dir, 'book_cover.png')
-  with open(path, 'wb') as f:
-      f.write(response.content)
-  Image.open(path).verify()
-  return path
 def validate_and_get_file(uploaded_file, url_string, file_type, temp_dir):
-  """Validate file input and return path."""
   has_upload = uploaded_file is not None
   has_url = url_string and url_string.strip()
   if not has_upload and not has_url:
-      return None, f"❌ Provide {file_type} via upload or URL"
   if has_upload and has_url:
-      return None, f"❌ Use only ONE method for {file_type}"
   if has_upload:
-      return (uploaded_file.name if hasattr(uploaded_file, 'name') else uploaded_file), None
-  try:
-      url = url_string.strip()
-      filename = url.split('/')[-1] or f"{file_type}_file"
-      if '.' not in filename:
-          ext_map = {'video': '.mp4', 'audio': '.wav', 'subtitle': '.srt'}
-          filename += ext_map.get(file_type, '.tmp')
-      timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-      return download_file_from_url(url, temp_dir, f"{file_type}_{timestamp}_{filename}"), None
-  except Exception as e:
-      return None, f"❌ Download error: {str(e)}"
-# Media Info (Cached)
-@lru_cache(maxsize=32)
-def get_video_info(video_path):
-  """Get video resolution and frame rate (cached)."""
-  result = subprocess.run([
-      "ffprobe", "-v", "error", "-select_streams", "v:0",
-      "-show_entries", "stream=width,height", "-of", "csv=s=x:p=0", video_path
-  ], capture_output=True, text=True, check=True)
-  width, height = map(int, result.stdout.strip().split('x'))
-  result = subprocess.run([
-      "ffprobe", "-v", "error", "-select_streams", "v:0",
-      "-show_entries", "stream=r_frame_rate", "-of", "default=noprint_wrappers=1:nokey=1", video_path
-  ], capture_output=True, text=True, check=True)
-  fps_str = result.stdout.strip()
-  fps = eval(fps_str) if '/' in fps_str else float(fps_str)
-  return width, height, fps
-@lru_cache(maxsize=32)
-def get_audio_duration(audio_path):
-  """Get audio duration (cached)."""
-  result = subprocess.run([
-      "ffprobe", "-v", "error", "-show_entries", "format=duration",
-      "-of", "default=noprint_wrappers=1:nokey=1", audio_path
-  ], capture_output=True, text=True, check=True)
-  return float(result.stdout.strip())
-# Subtitle Processing
-def extract_first_subtitle(srt_path):
-  """Extract first subtitle entry."""
-  with open(srt_path, 'r', encoding='utf-8') as f:
-      blocks = re.split(r'\n\s*\n', f.read().strip())
-  if not blocks:
-      return "No subtitle", 0.0, 3.0
-  lines = blocks[0].strip().split('\n')
-  if len(lines) >= 3:
-      times = lines[1].split(' --> ')
-      return ' '.join(lines[2:]).strip(), time_to_seconds(times[0].strip()), time_to_seconds(times[1].strip())
-  return "No subtitle", 0.0, 3.0
 def srt_time_to_ms(time_str):
   """Convert SRT timestamp to milliseconds."""
-  h, m, s = time_str.strip().split(':')
-  s, ms = s.split(',')
-  return int(h) * 3600000 + int(m) * 60000 + int(s) * 1000 + int(ms)
 def ms_to_ass_time(ms):
-  """Convert milliseconds to ASS timestamp."""
-  h = ms // 3600000
   ms %= 3600000
-  m = ms // 60000
   ms %= 60000
-  s = ms // 1000
-  cs = (ms % 1000) // 10
-  return f"{h}:{m:02d}:{s:02d}.{cs:02d}"
-def create_reddit_card_with_text(template_path, hook_text, output_dir, config=REDDIT_CONFIG):
-  """Create Reddit card with text using PIL."""
-  template = Image.open(template_path).convert('RGBA')
-  tw, th = template.size
-  text_box_w = int(tw * config['text_box_width_percent'])
-  text_box_h = int(th * config['text_box_height_percent'])
-  script_dir = os.path.dirname(os.path.abspath(__file__))
-  font_paths = [
-      os.path.join(script_dir, 'fonts', config['font_file']),
-      os.path.join(script_dir, config['font_file'])
-  ]
-  # Find best font size
-  best_size = config['font_size_max']
-  best_wrapped = hook_text
-  for size in range(config['font_size_max'], config['font_size_min'] - 1, -2):
-      font = load_font(font_paths, size)
-      wrapped = textwrap.fill(hook_text, width=config['text_wrap_width'])
-      draw = ImageDraw.Draw(template)
-      bbox = draw.multiline_textbbox((0, 0), wrapped, font=font, spacing=config['line_spacing'])
-      if bbox[2] <= text_box_w and bbox[3] <= text_box_h:
-          best_size = size
-          best_wrapped = wrapped
-          break
-  # Draw text
-  font = load_font(font_paths, best_size)
-  draw = ImageDraw.Draw(template)
-  bbox = draw.multiline_textbbox((0, 0), best_wrapped, font=font, spacing=config['line_spacing'])
-  x = (tw - bbox[2]) / 2
-  y = (th - bbox[3]) / 2 + config['y_offset']
-  draw.multiline_text((x, y), best_wrapped, fill=config['text_color'],
-                     font=font, spacing=config['line_spacing'], align='left')
-  output_path = os.path.join(output_dir, 'reddit_card.png')
-  template.save(output_path, 'PNG')
-  return output_path
 def create_word_by_word_highlight_ass(srt_path, output_dir, highlight_color='yellow',
-                                   font_size=None, skip_first=False, config=SUBTITLE_CONFIG):
-  """Convert SRT to ASS with word highlighting."""
-  font_size = font_size or config['font_size_default']
   color_map = {
-      'yellow': ('&H0000FFFF', '&H00000000'), 'orange': ('&H0000A5FF', '&H00000000'),
-      'green': ('&H0000FF00', '&H00000000'), 'cyan': ('&H00FFFF00', '&H00000000'),
-      'pink': ('&H00FF69B4', '&H00000000'), 'red': ('&H000000FF', '&H00FFFFFF'),
       'blue': ('&H00FF0000', '&H00FFFFFF'),
   }
   highlight_bg, highlight_text = color_map.get(highlight_color.lower(), ('&H0000FFFF', '&H00000000'))
   with open(srt_path, 'r', encoding='utf-8') as f:
       srt_content = f.read()
   ass_header = f"""[Script Info]
-Title: Word Highlight
 ScriptType: v4.00+
 [V4+ Styles]
 Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
 Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H00000000,&H80000000,0,0,0,0,100,100,0,0,1,2,1,{config['position_alignment']},{config['margin_left']},{config['margin_right']},{config['margin_vertical']},1
 [Events]
 Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 """
-  ass_events = []
   srt_blocks = re.split(r'\n\s*\n', srt_content.strip())
-  start_idx = 1 if skip_first else 0
-  for block in srt_blocks[start_idx:]:
       lines = block.strip().split('\n')
-      if len(lines) < 3:
-          continue
-      times = lines[1].split(' --> ')
-      if len(times) != 2:
-          continue
-      start_ms = srt_time_to_ms(times[0])
-      end_ms = srt_time_to_ms(times[1])
-      words = ' '.join(lines[2:]).split()
-      if not words:
-          continue
-      time_per_word = (end_ms - start_ms) / len(words)
-      for i, word in enumerate(words):
-          word_start = start_ms + int(i * time_per_word)
-          word_end = end_ms if i == len(words) - 1 else start_ms + int((i + 1) * time_per_word)
-          styled_words = [
-              f"{{\\c{highlight_text}\\3c{highlight_bg}\\bord5}}{w}{{\\r}}" if j == i else w
-              for j, w in enumerate(words)
-          ]
-          ass_events.append(
-              f"Dialogue: 0,{ms_to_ass_time(word_start)},{ms_to_ass_time(word_end)},Default,,0,0,0,,{' '.join(styled_words)}"
-          )
-  ass_path = os.path.join(output_dir, 'word_highlight.ass')
-  with open(ass_path, 'w') as f:
-      f.write(ass_header + '\n'.join(ass_events))
   return ass_path
-# Main Processing
-def stitch_media(video_file, video_url, audio_file, audio_url, subtitle_file, subtitle_url,
-              book_id, enable_highlight, highlight_color, font_size, crf_quality=23):
-  """Main stitching function - OPTIMIZED with timing."""
-  # START TIMER
-  start_time = time.time()
   temp_dir = tempfile.mkdtemp()
   try:
       ffmpeg_env = setup_custom_fonts_hf(temp_dir)
-      # Validate inputs
-      video_path, err = validate_and_get_file(video_file, video_url, 'video', temp_dir)
-      if err: return None, err
-      audio_path, err = validate_and_get_file(audio_file, audio_url, 'audio', temp_dir)
-      if err: return None, err
-      subtitle_path, err = validate_and_get_file(subtitle_file, subtitle_url, 'subtitle', temp_dir)
-      if err: return None, err
-      setup_time = time.time() - start_time
-      # Get media info
       video_width, video_height, video_fps = get_video_info(video_path)
       audio_duration = get_audio_duration(audio_path)
-      status = f"⏱️ Setup: {setup_time:.1f}s\n"
-      status += f"📥 {video_width}x{video_height}@{video_fps:.0f}fps | {audio_duration:.1f}s\n\n"
-      # Reddit overlay
       script_dir = os.path.dirname(os.path.abspath(__file__))
       reddit_template_path = os.path.join(script_dir, REDDIT_CONFIG['template_file'])
-      has_reddit = os.path.exists(reddit_template_path)
-      if has_reddit:
-          reddit_start = time.time()
-          first_text, first_start, first_end = extract_first_subtitle(subtitle_path)
-          reddit_card_path = create_reddit_card_with_text(reddit_template_path, first_text, temp_dir)
-          reddit_time = time.time() - reddit_start
-          status += f"📱 Reddit card: ✅ ({reddit_time:.1f}s)\n"
-      # Generate subtitles
-      sub_start = time.time()
-      subtitle_ass = create_word_by_word_highlight_ass(
-          subtitle_path, temp_dir, highlight_color, font_size,
-          skip_first=has_reddit, config=SUBTITLE_CONFIG
-      ) if enable_highlight else subtitle_path
-      sub_time = time.time() - sub_start
-      status += f"📝 Subtitles: ✅ ({sub_time:.1f}s)\n\n"
-      subtitle_escaped = subtitle_ass.replace('\\', '/').replace(':', '\\:')
-      # Output setup
-      timestamp = datetime.now().strftime("%H%M%S")
       output_path = os.path.join(temp_dir, f"final_{timestamp}.mp4")
-      has_book = book_id and book_id.strip()
-      # Calculate timings
-      fade_start = audio_duration * VIDEO_CONFIG['fade_start_percent']
-      fade_end = audio_duration * VIDEO_CONFIG['fade_end_percent']
-      fade_duration = fade_end - fade_start
-      promo_duration = audio_duration * VIDEO_CONFIG['promo_percent']
-      book_start = audio_duration - promo_duration
-      solid_duration = book_start - fade_end
-      # Common encoding flags (OPTIMIZED!)
-      common_encode_flags = [
-          "-c:v", "libx264",
-          "-preset", VIDEO_CONFIG['encoding_preset'],
-          "-crf", str(crf_quality),
-          "-pix_fmt", "yuv420p",
-          "-threads", str(VIDEO_CONFIG['threads'])
-      ]
-      if has_book:
-          status += "🎬 Encoding with book cover:\n\n"
-          book_cover_path = download_book_cover(book_id.strip(), temp_dir)
-          segments = []
-          # STEP 1: Main video
-          main_path = os.path.join(temp_dir, f"main_{timestamp}.mp4")
-          success, error, timing = run_ffmpeg_cmd([
-              "ffmpeg", "-hwaccel", "auto",
-              "-stream_loop", "-1", "-i", video_path, "-t", str(fade_end),
-              "-vf", f"fps={video_fps},scale={video_width}:{video_height},fade=t=out:st={fade_start}:d={fade_duration}:c={VIDEO_CONFIG['fade_color_hex']}",
-              *common_encode_flags, "-an", "-y", main_path
-          ], ffmpeg_env, "Step 1/4: Main video", start_time)
-          if not success: return None, error
-          status += f"{timing}\n"
-          segments.append(main_path)
-          # STEP 2: Solid color
-          if solid_duration > 0:
-              solid_path = os.path.join(temp_dir, f"solid_{timestamp}.mp4")
-              success, error, timing = run_ffmpeg_cmd([
                   "ffmpeg", "-f", "lavfi",
-                  "-i", f"color=c={VIDEO_CONFIG['fade_color_hex']}:s={video_width}x{video_height}:d={solid_duration}:r={video_fps}",
-                  *common_encode_flags, "-y", solid_path
-              ], ffmpeg_env, "Step 2/4: Solid color", start_time)
-              if not success: return None, error
-              status += f"{timing}\n"
-              segments.append(solid_path)
-          # STEP 3: Book cover
-          cover_path = os.path.join(temp_dir, f"cover_{timestamp}.mp4")
-          success, error, timing = run_ffmpeg_cmd([
-              "ffmpeg", "-hwaccel", "auto",
-              "-loop", "1", "-i", book_cover_path, "-t", str(promo_duration),
-              "-vf", f"scale={video_width}:{video_height}:force_original_aspect_ratio=decrease,pad={video_width}:{video_height}:(ow-iw)/2:(oh-ih)/2:color={VIDEO_CONFIG['fade_color_hex']},setsar=1,fps={video_fps},fade=t=in:st=0:d={VIDEO_CONFIG['book_fade_in_duration']}:c={VIDEO_CONFIG['fade_color_hex']}",
-              *common_encode_flags, "-an", "-y", cover_path
-          ], ffmpeg_env, "Step 3/4: Book cover", start_time)
-          if not success: return None, error
-          status += f"{timing}\n"
-          segments.append(cover_path)
-          # STEP 4: Final assembly
-          concat_list = os.path.join(temp_dir, f"concat_{timestamp}.txt")
-          with open(concat_list, 'w') as f:
-              f.write('\n'.join(f"file '{s}'" for s in segments))
-          if has_reddit:
-              filter_complex = (
-                  f"[0:v]ass={subtitle_escaped}[bg];"
-                  f"[1:v]scale={video_width}*{VIDEO_CONFIG['reddit_scale_percent']}:-1[reddit];"
-                  f"[bg][reddit]overlay=(W-w)/2:(H-h)/2:enable='between(t,{first_start},{first_end})'[v]"
-              )
-              cmd = [
-                  "ffmpeg", "-hwaccel", "auto",
-                  "-f", "concat", "-safe", "0", "-i", concat_list,
-                  "-loop", "1", "-i", reddit_card_path, "-i", audio_path,
-                  "-filter_complex", filter_complex, "-map", "[v]", "-map", "2:a",
-                  *common_encode_flags, "-c:a", "aac", "-shortest", "-y", output_path
               ]
-          else:
-              cmd = [
-                  "ffmpeg", "-hwaccel", "auto",
-                  "-f", "concat", "-safe", "0", "-i", concat_list, "-i", audio_path,
-                  "-vf", f"ass={subtitle_escaped}", "-map", "0:v", "-map", "1:a",
-                  *common_encode_flags, "-c:a", "aac", "-shortest", "-y", output_path
               ]
-          success, error, timing = run_ffmpeg_cmd(cmd, ffmpeg_env, "Step 4/4: Final", start_time)
-          if not success: return None, error
-          status += f"{timing}\n"
       else:
-          # Simple loop (no book)
-          status += "🎬 Encoding:\n\n"
-          if has_reddit:
               filter_complex = (
                   f"[0:v]ass={subtitle_escaped}[bg];"
                   f"[1:v]scale={video_width}*{VIDEO_CONFIG['reddit_scale_percent']}:-1[reddit];"
-                  f"[bg][reddit]overlay=(W-w)/2:(H-h)/2:enable='between(t,{first_start},{first_end})'[v]"
               )
               cmd = [
-                  "ffmpeg", "-hwaccel", "auto",
-                  "-stream_loop", "-1", "-i", video_path,
                   "-loop", "1", "-i", reddit_card_path, "-i", audio_path,
                   "-filter_complex", filter_complex, "-map", "[v]", "-map", "2:a",
-                  *common_encode_flags, "-c:a", "aac", "-shortest", "-y", output_path
               ]
           else:
               cmd = [
-                  "ffmpeg", "-hwaccel", "auto",
-                  "-stream_loop", "-1", "-i", video_path, "-i", audio_path,
                   "-vf", f"ass={subtitle_escaped}", "-map", "0:v", "-map", "1:a",
-                  *common_encode_flags, "-c:a", "aac", "-shortest", "-y", output_path
               ]
-          success, error, timing = run_ffmpeg_cmd(cmd, ffmpeg_env, "Video encoding", start_time)
-          if not success: return None, error
-          status += f"{timing}\n"
-      # Success - Calculate total time
-      total_time = time.time() - start_time
       if os.path.exists(output_path):
-          size_mb = os.path.getsize(output_path) / (1024 * 1024)
-          success_msg = f"""✅ VIDEO COMPLETE!
-📊 File: {size_mb:.1f}MB | Duration: {audio_duration:.1f}s
-⏱️ TOTAL TIME: {format_elapsed_time(total_time)} ({total_time:.1f}s)
-⚡ Preset: {VIDEO_CONFIG['encoding_preset']} | Threads: {VIDEO_CONFIG['threads']}
-──────────────────────────
-{status}"""
           return output_path, success_msg
-      return None, "❌ Output not created"
   except Exception as e:
-      total_time = time.time() - start_time
-      return None, f"❌ Error after {format_elapsed_time(total_time)}: {str(e)}"
 # Gradio UI
 with gr.Blocks(title="Video Stitcher", theme=gr.themes.Soft()) as app:
-  gr.Markdown(f"""
-# 🎬 Video Stitcher ⚡ OPTIMIZED
-**Performance:** Hardware accel + {VIDEO_CONFIG['encoding_preset']} preset + multi-threading
-**Config:** Reddit={REDDIT_CONFIG['font_file']} | Subtitle={SUBTITLE_CONFIG['font_name']}
-**Expected:** 3-4 minutes (was 6 minutes) - 30-50% faster! 🚀
-""")
   with gr.Row():
       with gr.Column():
           with gr.Group():
-              gr.Markdown("**📹 Video**")
               video_input = gr.File(label="Upload", file_types=[".mp4", ".mov", ".avi", ".mkv"], type="filepath")
-              video_url_input = gr.Textbox(label="OR URL", placeholder="https://...")
           with gr.Group():
-              gr.Markdown("**🎵 Audio**")
-              audio_input = gr.File(label="Upload", file_types=[".wav", ".mp3", ".aac"], type="filepath")
-              audio_url_input = gr.Textbox(label="OR URL", placeholder="https://...")
           with gr.Group():
-              gr.Markdown("**📝 Subtitle**")
               subtitle_input = gr.File(label="Upload (.srt)", file_types=[".srt"], type="filepath")
-              subtitle_url_input = gr.Textbox(label="OR URL", placeholder="https://...")
-          book_id_input = gr.Textbox(label="📚 Book ID (Optional)", placeholder="wyaEDwAAQBAJ")
-          with gr.Row():
-              enable_highlight = gr.Checkbox(label="Highlight", value=True)
-              highlight_color = gr.Dropdown(choices=['yellow', 'orange', 'green', 'cyan', 'pink', 'red', 'blue'],
-                                          value='yellow', label="Color")
-          with gr.Row():
-              font_size = gr.Slider(12, 32, 18, step=2, label="Font Size")
-              crf_input = gr.Slider(18, 28, 23, step=1, label="Quality")
           stitch_btn = gr.Button("🎬 Stitch Video", variant="primary", size="lg")
       with gr.Column():
-          status_output = gr.Textbox(label="Status", lines=12)
           video_output = gr.Video(label="Result")
-  gr.Markdown("""
-### ⚡ Optimizations Applied:
-- ✅ Hardware acceleration (`-hwaccel auto`)
-- ✅ Faster encoding preset
-- ✅ Multi-threading (auto CPU cores)
-- ✅ Cached media info
-- ✅ **Real-time execution tracking**
-**Timeline shown for each step + total time!**
-""")
   stitch_btn.click(
       fn=stitch_media,
@@ -626,4 +744,4 @@ with gr.Blocks(title="Video Stitcher", theme=gr.themes.Soft()) as app:
   )
 if __name__ == "__main__":
-  app.launch(show_error=True)

 import re
 import textwrap
 import shutil
 from datetime import datetime
 from PIL import Image, ImageDraw, ImageFont
 # ========================================
 # CONFIGURATION SECTION - CUSTOMIZE HERE
 # ========================================
+# Reddit Template Text Settings
 REDDIT_CONFIG = {
+  'template_file': 'reddit_template.png',  # Template filename in script directory
+  'font_file': 'RFDewi-Bold.ttf',          # Font file for Reddit text
+  'font_size_max': 180,                     # Maximum font size to try
+  'font_size_min': 16,                      # Minimum font size (if text too long)
+  'text_wrap_width': 35,                    # Characters per line for wrapping
+  'text_color': 'black',                    # Text color
+  'line_spacing': 10,                       # Spacing between lines
+  'text_box_width_percent': 0.85,            # 80% of template width
+  'text_box_height_percent': 0.65,           # 50% of template height
+  'y_offset': 20,                           # Vertical offset from center
 }
+# Word-by-Word Subtitle Settings
 SUBTITLE_CONFIG = {
+  'font_file': 'komiko_axis.ttf',           # Font file for subtitles (TTF or OTF)
+  'font_name': 'Komika Axis',               # Font name as it appears in system
+  'font_size_default': 12,                  # Default subtitle font size
+  'position_alignment': 5,                  # 5 = center (1-9 numpad layout)
+  'margin_left': 20,
+  'margin_right': 20,
   'margin_vertical': 0,
 }
+# Video Processing Settings
 VIDEO_CONFIG = {
+  'reddit_scale_percent': 0.75,             # Reddit template size (0.75 = 75% of video width)
+  'fade_start_percent': 0.70,                # When fade to color starts (60%)
+  'fade_end_percent': 0.83,                 # When fully faded to color (75%)
+  'promo_percent': 0.1,                     # Last 10% for book cover
+  'fade_color_rgb': (218, 207, 195),        # Fade color RGB
+  'book_fade_in_duration': 2,               # Book cover fade-in duration (seconds)
 }
 # ========================================
+# END CONFIGURATION SECTION
 # ========================================
+# Add static ffmpeg to PATH
 static_ffmpeg.add_paths()
 def setup_custom_fonts_hf(temp_dir):
+  """
+  Setup custom fonts for FFmpeg/libass - Hugging Face Spaces compatible.
+  File Structure Required:
+    project/
+    ├── app.py
+    ├── fonts/
+    │   ├── komiko_axis.ttf (or your fonts)
+    │   └── (other fonts...)
+    └── reddit_template.png
+  Returns: environment dict with FONTCONFIG configured
+  """
   try:
       fonts_dir = os.path.join(temp_dir, 'fonts')
       os.makedirs(fonts_dir, exist_ok=True)
+      # Get script directory and check for fonts/ subdirectory
       script_dir = os.path.dirname(os.path.abspath(__file__))
+      repo_fonts_dir = os.path.join(script_dir, 'fonts')
+      # Also check for fonts in script root (fallback)
       fonts_to_copy = []
+      # Check fonts/ subdirectory first
       if os.path.exists(repo_fonts_dir):
+          for font_file in os.listdir(repo_fonts_dir):
+              if font_file.endswith(('.ttf', '.otf', '.TTF', '.OTF')):
+                  fonts_to_copy.append(os.path.join(repo_fonts_dir, font_file))
+      # Check script root directory for fonts
+      for item in [REDDIT_CONFIG['font_file'], SUBTITLE_CONFIG['font_file']]:
+          font_path = os.path.join(script_dir, item)
           if os.path.exists(font_path) and font_path not in fonts_to_copy:
               fonts_to_copy.append(font_path)
+      # Copy all found fonts
       for src in fonts_to_copy:
+          dst = os.path.join(fonts_dir, os.path.basename(src))
+          shutil.copy(src, dst)
       if fonts_to_copy:
+          # Create fonts.conf for fontconfig
+          fonts_conf = f"""<?xml version="1.0"?>
 <fontconfig>
 <dir>{fonts_dir}</dir>
 <cachedir>{temp_dir}/cache</cachedir>
+</fontconfig>
+"""
+          conf_path = os.path.join(temp_dir, 'fonts.conf')
+          with open(conf_path, 'w') as f:
+              f.write(fonts_conf)
+          # Set environment variables
           env = os.environ.copy()
+          env['FONTCONFIG_FILE'] = conf_path
           env['FONTCONFIG_PATH'] = temp_dir
           return env
+      # Fallback to normal environment
       return os.environ.copy()
+  except Exception as e:
       return os.environ.copy()
 def download_file_from_url(url, output_dir, filename):
+  """Download a file from URL and save it to output directory."""
+  try:
+      response = requests.get(url, stream=True, timeout=30)
+      response.raise_for_status()
+      file_path = os.path.join(output_dir, filename)
+      with open(file_path, 'wb') as f:
+          for chunk in response.iter_content(chunk_size=8192):
+              f.write(chunk)
+      return file_path
+  except Exception as e:
+      raise Exception(f"Failed to download file from URL: {str(e)}")
 def download_book_cover(book_id, output_dir):
+  """Download book cover from Google Books API using Book ID."""
+  try:
+      image_url = f"https://books.google.com/books/publisher/content/images/frontcover/{book_id}?fife=w400-h600&source=gbs_api"
+      response = requests.get(image_url, timeout=30)
+      response.raise_for_status()
+      image_path = os.path.join(output_dir, 'book_cover.png')
+      with open(image_path, 'wb') as f:
+          f.write(response.content)
+      img = Image.open(image_path)
+      img.verify()
+      return image_path
+  except Exception as e:
+      raise Exception(f"Failed to download book cover: {str(e)}")
+def get_video_info(video_path):
+  """Get video resolution and frame rate using ffprobe."""
+  try:
+      cmd_res = [
+          "ffprobe", "-v", "error", "-select_streams", "v:0",
+          "-show_entries", "stream=width,height", "-of", "csv=s=x:p=0", video_path
+      ]
+      result = subprocess.run(cmd_res, capture_output=True, text=True, check=True)
+      width, height = result.stdout.strip().split('x')
+      cmd_fps = [
+          "ffprobe", "-v", "error", "-select_streams", "v:0",
+          "-show_entries", "stream=r_frame_rate", "-of", "default=noprint_wrappers=1:nokey=1", video_path
+      ]
+      result = subprocess.run(cmd_fps, capture_output=True, text=True, check=True)
+      fps_str = result.stdout.strip()
+      if '/' in fps_str:
+          num, den = fps_str.split('/')
+          fps = float(num) / float(den)
+      else:
+          fps = float(fps_str)
+      return int(width), int(height), fps
+  except Exception as e:
+      raise Exception(f"Failed to get video info: {str(e)}")
+def get_audio_duration(audio_path):
+  """Get audio duration in seconds using ffprobe."""
+  try:
+      cmd = [
+          "ffprobe", "-v", "error", "-show_entries", "format=duration",
+          "-of", "default=noprint_wrappers=1:nokey=1", audio_path
+      ]
+      result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+      return float(result.stdout.strip())
+  except Exception as e:
+      raise Exception(f"Failed to get audio duration: {str(e)}")
+def extract_first_subtitle(srt_path):
+  """Extract first subtitle entry. Returns: (text, start_sec, end_sec)"""
+  try:
+      with open(srt_path, 'r', encoding='utf-8') as f:
+          content = f.read()
+      blocks = re.split(r'\n\s*\n', content.strip())
+      if not blocks:
+          return "No subtitle found", 0.0, 3.0
+      first_block = blocks[0].strip().split('\n')
+      if len(first_block) >= 3:
+          times = first_block[1].split(' --> ')
+          def time_to_sec(t):
+              h, m, s = t.split(':')
+              s, ms = s.split(',')
+              return int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 1000.0
+          start_sec = time_to_sec(times[0].strip())
+          end_sec = time_to_sec(times[1].strip())
+          text = ' '.join(first_block[2:]).strip()
+          return text, start_sec, end_sec
+      return "No subtitle found", 0.0, 3.0
+  except Exception as e:
+      raise Exception(f"Failed to extract first subtitle: {str(e)}")
+def create_reddit_card_with_text(template_path, hook_text, output_dir, config=REDDIT_CONFIG):
+  """
+  Create Reddit card with text using PIL.
+  Uses REDDIT_CONFIG for all styling settings.
+  """
+  try:
+      template = Image.open(template_path).convert('RGBA')
+      template_width, template_height = template.size
+      text_box_width = int(template_width * config['text_box_width_percent'])
+      text_box_height = int(template_height * config['text_box_height_percent'])
+      best_font_size = config['font_size_max']
+      best_wrapped_text = hook_text
+      # Get font path
+      script_dir = os.path.dirname(os.path.abspath(__file__))
+      font_paths = [
+          os.path.join(script_dir, 'fonts', config['font_file']),
+          os.path.join(script_dir, config['font_file'])
+      ]
+      # Try font sizes from max to min
+      for font_size in range(config['font_size_max'], config['font_size_min'] - 1, -2):
+          # Try loading font from multiple locations
+          font = None
+          for font_path in font_paths:
+              if os.path.exists(font_path):
+                  try:
+                      font = ImageFont.truetype(font_path, font_size)
+                      break
+                  except:
+                      pass
+          # Fallback fonts
+          if font is None:
+              try:
+                  font = ImageFont.truetype('Verdana', font_size)
+              except:
+                  font = ImageFont.load_default()
+          # Wrap and measure text
+          wrapped = textwrap.fill(hook_text, width=config['text_wrap_width'])
+          draw = ImageDraw.Draw(template)
+          bbox = draw.multiline_textbbox((0, 0), wrapped, font=font, spacing=config['line_spacing'])
+          text_width = bbox[2] - bbox[0]
+          text_height = bbox[3] - bbox[1]
+          if text_width <= text_box_width and text_height <= text_box_height:
+              best_font_size = font_size
+              best_wrapped_text = wrapped
+              break
+      # Draw text with best size
+      font = None
+      for font_path in font_paths:
+          if os.path.exists(font_path):
+              try:
+                  font = ImageFont.truetype(font_path, best_font_size)
+                  break
+              except:
+                  pass
+      if font is None:
+          try:
+              font = ImageFont.truetype('Verdana', best_font_size)
+          except:
+              font = ImageFont.load_default()
+      draw = ImageDraw.Draw(template)
+      bbox = draw.multiline_textbbox((0, 0), best_wrapped_text, font=font, spacing=config['line_spacing'])
+      text_width = bbox[2] - bbox[0]
+      text_height = bbox[3] - bbox[1]
+      x = (template_width - text_width) / 2
+      y = (template_height - text_height) / 2 + config['y_offset']
+      draw.multiline_text(
+          (x, y),
+          best_wrapped_text,
+          fill=config['text_color'],
+          font=font,
+          spacing=config['line_spacing'],
+          align='left'
+      )
+      output_path = os.path.join(output_dir, 'reddit_card_composite.png')
+      template.save(output_path, 'PNG')
+      return output_path
+  except Exception as e:
+      raise Exception(f"Failed to create Reddit card: {str(e)}")
 def validate_and_get_file(uploaded_file, url_string, file_type, temp_dir):
+  """Validate that only one input method is used and return the file path."""
   has_upload = uploaded_file is not None
   has_url = url_string and url_string.strip()
   if not has_upload and not has_url:
+      return None, f"❌ Please provide {file_type} either by upload or URL"
   if has_upload and has_url:
+      return None, f"❌ Please use only ONE method for {file_type}: either upload OR URL (not both)"
   if has_upload:
+      file_path = uploaded_file.name if hasattr(uploaded_file, 'name') else uploaded_file
+      return file_path, None
+  if has_url:
+      try:
+          url_parts = url_string.strip().split('/')
+          original_filename = url_parts[-1] if url_parts else f"{file_type}_file"
+          if '.' not in original_filename:
+              ext_map = {'video': '.mp4', 'audio': '.wav', 'subtitle': '.srt'}
+              original_filename += ext_map.get(file_type, '.tmp')
+          timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+          filename = f"{file_type}_{timestamp}_{original_filename}"
+          file_path = download_file_from_url(url_string.strip(), temp_dir, filename)
+          return file_path, None
+      except Exception as e:
+          return None, f"❌ Error downloading {file_type} from URL: {str(e)}"
+  return None, f"❌ Unknown error processing {file_type}"
 def srt_time_to_ms(time_str):
   """Convert SRT timestamp to milliseconds."""
+  time_str = time_str.strip()
+  hours, minutes, seconds = time_str.split(':')
+  seconds, milliseconds = seconds.split(',')
+  return (int(hours) * 3600000 + int(minutes) * 60000 +
+          int(seconds) * 1000 + int(milliseconds))
 def ms_to_ass_time(ms):
+  """Convert milliseconds to ASS timestamp format."""
+  hours = ms // 3600000
   ms %= 3600000
+  minutes = ms // 60000
   ms %= 60000
+  seconds = ms // 1000
+  centiseconds = (ms % 1000) // 10
+  return f"{hours}:{minutes:02d}:{seconds:02d}.{centiseconds:02d}"
 def create_word_by_word_highlight_ass(srt_path, output_dir, highlight_color='yellow',
+                                     font_size=None, skip_first=False, config=SUBTITLE_CONFIG):
+  """
+  Convert SRT to ASS with word-by-word highlighting.
+  Uses SUBTITLE_CONFIG for all font and styling settings.
+  """
+  if font_size is None:
+      font_size = config['font_size_default']
   color_map = {
+      'yellow': ('&H0000FFFF', '&H00000000'),
+      'orange': ('&H0000A5FF', '&H00000000'),
+      'green': ('&H0000FF00', '&H00000000'),
+      'cyan': ('&H00FFFF00', '&H00000000'),
+      'pink': ('&H00FF69B4', '&H00000000'),
+      'red': ('&H000000FF', '&H00FFFFFF'),
       'blue': ('&H00FF0000', '&H00FFFFFF'),
   }
   highlight_bg, highlight_text = color_map.get(highlight_color.lower(), ('&H0000FFFF', '&H00000000'))
   with open(srt_path, 'r', encoding='utf-8') as f:
       srt_content = f.read()
+  ass_path = os.path.join(output_dir, 'word_highlight_subtitles.ass')
   ass_header = f"""[Script Info]
+Title: Word-by-Word Highlight Subtitles
 ScriptType: v4.00+
+Collisions: Normal
+PlayDepth: 0
 [V4+ Styles]
 Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
 Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H00000000,&H80000000,0,0,0,0,100,100,0,0,1,2,1,{config['position_alignment']},{config['margin_left']},{config['margin_right']},{config['margin_vertical']},1
 [Events]
 Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 """
   srt_blocks = re.split(r'\n\s*\n', srt_content.strip())
+  ass_events = []
+  start_index = 1 if skip_first else 0
+  for block in srt_blocks[start_index:]:
       lines = block.strip().split('\n')
+      if len(lines) >= 3:
+          timestamp_line = lines[1]
+          times = timestamp_line.split(' --> ')
+          if len(times) == 2:
+              start_ms = srt_time_to_ms(times[0])
+              end_ms = srt_time_to_ms(times[1])
+              text = ' '.join(lines[2:])
+              words = text.split()
+              if not words:
+                  continue
+              total_duration = end_ms - start_ms
+              time_per_word = total_duration / len(words)
+              for i, word in enumerate(words):
+                  word_start_ms = start_ms + int(i * time_per_word)
+                  word_end_ms = start_ms + int((i + 1) * time_per_word)
+                  if i == len(words) - 1:
+                      word_end_ms = end_ms
+                  text_parts = []
+                  for j, w in enumerate(words):
+                      if j == i:
+                          text_parts.append(f"{{\\c{highlight_text}\\3c{highlight_bg}\\bord5}}{w}{{\\r}}")
+                      else:
+                          text_parts.append(w)
+                  styled_text = ' '.join(text_parts)
+                  start_time = ms_to_ass_time(word_start_ms)
+                  end_time = ms_to_ass_time(word_end_ms)
+                  ass_line = f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{styled_text}"
+                  ass_events.append(ass_line)
+  with open(ass_path, 'w', encoding='utf-8') as f:
+      f.write(ass_header)
+      f.write('\n'.join(ass_events))
   return ass_path
+def stitch_media(
+  video_file, video_url,
+  audio_file, audio_url,
+  subtitle_file, subtitle_url,
+  book_id,
+  enable_highlight,
+  highlight_color,
+  font_size,
+  crf_quality=23
+):
+  """Main video stitching function with Reddit overlay and book cover."""
   temp_dir = tempfile.mkdtemp()
   try:
+      # Setup custom fonts environment
       ffmpeg_env = setup_custom_fonts_hf(temp_dir)
+      # Validate files
+      video_path, video_error = validate_and_get_file(video_file, video_url, 'video', temp_dir)
+      if video_error: return None, video_error
+      audio_path, audio_error = validate_and_get_file(audio_file, audio_url, 'audio', temp_dir)
+      if audio_error: return None, audio_error
+      subtitle_path, subtitle_error = validate_and_get_file(subtitle_file, subtitle_url, 'subtitle', temp_dir)
+      if subtitle_error: return None, subtitle_error
+      # Get video info
       video_width, video_height, video_fps = get_video_info(video_path)
       audio_duration = get_audio_duration(audio_path)
+      status_msg = "📥 Processing files:\n"
+      status_msg += f"  • Video: {'URL' if video_url else 'Upload'} ({video_width}x{video_height} @ {video_fps:.2f}fps)\n"
+      status_msg += f"  • Audio: {'URL' if audio_url else 'Upload'} ({audio_duration:.2f}s)\n"
+      status_msg += f"  • Subtitle: {'URL' if subtitle_url else 'Upload'}\n"
+      # Check for Reddit template
       script_dir = os.path.dirname(os.path.abspath(__file__))
       reddit_template_path = os.path.join(script_dir, REDDIT_CONFIG['template_file'])
+      has_reddit_template = os.path.exists(reddit_template_path)
+      if has_reddit_template:
+          status_msg += "  • Reddit template: ✅ Found\n"
+          try:
+              first_sub_text, first_sub_start, first_sub_end = extract_first_subtitle(subtitle_path)
+              status_msg += f"\n📱 Reddit Overlay:\n"
+              status_msg += f"  • Text: '{first_sub_text[:40]}...'\n"
+              status_msg += f"  • Timing: {first_sub_start:.1f}s - {first_sub_end:.1f}s\n"
+              reddit_card_path = create_reddit_card_with_text(
+                  reddit_template_path, first_sub_text, temp_dir, REDDIT_CONFIG
+              )
+              status_msg += "  • ✅ Reddit card ready\n"
+          except Exception as e:
+              status_msg += f"  • ⚠️ Reddit card failed: {str(e)}\n"
+              has_reddit_template = False
+      else:
+          status_msg += "  • Reddit template: ⚠️ Not found (skipping)\n"
+      # Process subtitles
+      if enable_highlight:
+          status_msg += f"\n✨ Word highlighting: {highlight_color} ({font_size}px)\n"
+          subtitle_to_use = create_word_by_word_highlight_ass(
+              subtitle_path, temp_dir, highlight_color, font_size,
+              skip_first=has_reddit_template, config=SUBTITLE_CONFIG
+          )
+      else:
+          subtitle_to_use = subtitle_path
+      subtitle_escaped = subtitle_to_use.replace('\\', '/').replace(':', '\\:')
+      # Check book cover
+      has_book_cover = book_id and book_id.strip()
+      timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
       output_path = os.path.join(temp_dir, f"final_{timestamp}.mp4")
+      # Convert RGB to BGR hex
+      r, g, b = VIDEO_CONFIG['fade_color_rgb']
+      fade_color_hex = f"#dacfc3"
+      if has_book_cover:
+          status_msg += f"\n📚 Downloading book cover (ID: {book_id})...\n"
+          try:
+              book_cover_path = download_book_cover(book_id.strip(), temp_dir)
+              status_msg += "✅ Book cover downloaded\n"
+              # Calculate timing from config
+              fade_starts_at = audio_duration * VIDEO_CONFIG['fade_start_percent']
+              fade_ends_at = audio_duration * VIDEO_CONFIG['fade_end_percent']
+              fade_out_duration = fade_ends_at - fade_starts_at
+              promo_duration = audio_duration * VIDEO_CONFIG['promo_percent']
+              book_appears_at = audio_duration - promo_duration
+              solid_color_duration = book_appears_at - fade_ends_at
+              main_video_duration = fade_ends_at
+              cover_segment_duration = promo_duration
+              status_msg += f"\n⏱️ Timing: Fade {fade_starts_at:.1f}→{fade_ends_at:.1f}s, Hold {solid_color_duration:.1f}s\n"
+              # STEP 1: Main video with fade-out
+              status_msg += "🎬 Step 1/4: Main video with fade-out...\n"
+              main_segment_path = os.path.join(temp_dir, f"main_{timestamp}.mp4")
+              cmd_main = [
+                  "ffmpeg", "-stream_loop", "-1", "-i", video_path, "-t", str(main_video_duration),
+                  "-vf", f"fps={video_fps},scale={video_width}:{video_height},fade=t=out:st={fade_starts_at}:d={fade_out_duration}:c={fade_color_hex}",
+                  "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", main_segment_path
+              ]
+              subprocess.run(cmd_main, check=True, capture_output=True, text=True, env=ffmpeg_env)
+              # STEP 2: Solid color
+              status_msg += "✅ Step 1 done\n🎬 Step 2/4: Solid color...\n"
+              solid_color_path = os.path.join(temp_dir, f"solid_{timestamp}.mp4")
+              cmd_solid = [
                   "ffmpeg", "-f", "lavfi",
+                  "-i", f"color=c={fade_color_hex}:s={video_width}x{video_height}:d={solid_color_duration}:r={video_fps}",
+                  "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-y", solid_color_path
               ]
+              subprocess.run(cmd_solid, check=True, capture_output=True, text=True, env=ffmpeg_env)
+              # STEP 3: Cover with fade-in
+              status_msg += "✅ Step 2 done\n🎬 Step 3/4: Cover with fade-in...\n"
+              cover_segment_path = os.path.join(temp_dir, f"cover_{timestamp}.mp4")
+              cmd_cover = [
+                  "ffmpeg", "-loop", "1", "-i", book_cover_path, "-t", str(cover_segment_duration),
+# CHANGE: Removed 'force_original_aspect_ratio' and 'pad'. Just scale to fit.
+                  "-vf", f"scale={video_width}:{video_height},setsar=1,fps={video_fps},fade=t=in:st=0:d={VIDEO_CONFIG['book_fade_in_duration']}:c={fade_color_hex}",
+                  "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", cover_segment_path
               ]
+              subprocess.run(cmd_cover, check=True, capture_output=True, text=True, env=ffmpeg_env)
+              # STEP 4: Concat + audio + subtitles + Reddit
+              status_msg += "✅ Step 3 done\n🎬 Step 4/4: Final assembly...\n"
+              concat_list_path = os.path.join(temp_dir, f"concat_{timestamp}.txt")
+              with open(concat_list_path, 'w') as f:
+                  f.write(f"file '{main_segment_path}'\n")
+                  f.write(f"file '{solid_color_path}'\n")
+                  f.write(f"file '{cover_segment_path}'\n")
+              if has_reddit_template:
+                  filter_complex = (
+                      f"[0:v]ass={subtitle_escaped}[bg];"
+                      f"[1:v]scale={video_width}*{VIDEO_CONFIG['reddit_scale_percent']}:-1[reddit];"
+                      f"[bg][reddit]overlay=(W-w)/2:(H-h)/2:enable='between(t,{first_sub_start},{first_sub_end})'[v]"
+                  )
+                  cmd_final = [
+                      "ffmpeg", "-f", "concat", "-safe", "0", "-i", concat_list_path,
+                      "-loop", "1", "-i", reddit_card_path, "-i", audio_path,
+                      "-filter_complex", filter_complex, "-map", "[v]", "-map", "2:a",
+                      "-c:v", "libx264", "-crf", str(crf_quality), "-c:a", "aac",
+                      "-pix_fmt", "yuv420p", "-shortest", "-y", output_path
+                  ]
+              else:
+                  cmd_final = [
+                      "ffmpeg", "-f", "concat", "-safe", "0", "-i", concat_list_path, "-i", audio_path,
+                      "-vf", f"ass={subtitle_escaped}", "-map", "0:v", "-map", "1:a",
+                      "-c:v", "libx264", "-crf", str(crf_quality), "-c:a", "aac",
+                      "-pix_fmt", "yuv420p", "-shortest", "-y", output_path
+                  ]
+              subprocess.run(cmd_final, check=True, capture_output=True, text=True, env=ffmpeg_env)
+          except subprocess.CalledProcessError as e:
+              return None, f"❌ FFmpeg error:\n{e.stderr[-1000:] if e.stderr else str(e)}"
+          except Exception as e:
+              return None, f"❌ Error: {str(e)}"
       else:
+          # No book cover - simple loop
+          status_msg += "\n🎬 Creating video...\n"
+          if has_reddit_template:
               filter_complex = (
                   f"[0:v]ass={subtitle_escaped}[bg];"
                   f"[1:v]scale={video_width}*{VIDEO_CONFIG['reddit_scale_percent']}:-1[reddit];"
+                  f"[bg][reddit]overlay=(W-w)/2:(H-h)/2:enable='between(t,{first_sub_start},{first_sub_end})'[v]"
               )
               cmd = [
+                  "ffmpeg", "-stream_loop", "-1", "-i", video_path,
                   "-loop", "1", "-i", reddit_card_path, "-i", audio_path,
                   "-filter_complex", filter_complex, "-map", "[v]", "-map", "2:a",
+                  "-c:v", "libx264", "-crf", str(crf_quality), "-c:a", "aac",
+                  "-shortest", "-y", output_path
               ]
           else:
               cmd = [
+                  "ffmpeg", "-stream_loop", "-1", "-i", video_path, "-i", audio_path,
                   "-vf", f"ass={subtitle_escaped}", "-map", "0:v", "-map", "1:a",
+                  "-c:v", "libx264", "-crf", str(crf_quality), "-c:a", "aac",
+                  "-shortest", "-y", output_path
               ]
+          subprocess.run(cmd, check=True, capture_output=True, text=True, env=ffmpeg_env)
+      # Check output
       if os.path.exists(output_path):
+          file_size = os.path.getsize(output_path) / (1024 * 1024)
+          success_msg = f"✅ Video created successfully!\n\n"
+          success_msg += f"📊 Size: {file_size:.2f} MB | Duration: {audio_duration:.2f}s\n"
+          success_msg += f"🎨 Quality: CRF {crf_quality} | FPS: {video_fps:.2f}\n"
+          if has_reddit_template:
+              success_msg += f"📱 Reddit: ✅ ({first_sub_start:.1f}-{first_sub_end:.1f}s)\n"
+          if has_book_cover:
+              success_msg += f"📚 Book: ✅ (Fade: 60→75%, Hold: 75→90%, Book: 90→100%)\n"
+          success_msg += "\n" + status_msg
           return output_path, success_msg
+      else:
+          return None, "❌ Output file was not created"
   except Exception as e:
+      return None, f"❌ Error: {str(e)}"
 # Gradio UI
 with gr.Blocks(title="Video Stitcher", theme=gr.themes.Soft()) as app:
+  gr.Markdown(
+      f"""
+      # 🎬 Video Stitcher with Reddit Overlay & Book Promo ✨
+      **Current Configuration:**
+      - 📱 Reddit text: {REDDIT_CONFIG['font_file']} ({REDDIT_CONFIG['font_size_max']}-{REDDIT_CONFIG['font_size_min']}px)
+      - 💬 Subtitle: {SUBTITLE_CONFIG['font_name']} ({SUBTITLE_CONFIG['font_size_default']}px)
+      - 🎨 Fade color: RGB{VIDEO_CONFIG['fade_color_rgb']}
+      **To customize:** Edit CONFIG dictionaries at top of script
+      """
+  )
   with gr.Row():
       with gr.Column():
+          gr.Markdown("### 📹 Video")
           with gr.Group():
               video_input = gr.File(label="Upload", file_types=[".mp4", ".mov", ".avi", ".mkv"], type="filepath")
+              gr.Markdown("**OR**")
+              video_url_input = gr.Textbox(label="URL", placeholder="https://example.com/video.mp4")
+          gr.Markdown("### 🎵 Audio")
           with gr.Group():
+              audio_input = gr.File(label="Upload", file_types=[".wav", ".mp3", ".aac", ".m4a"], type="filepath")
+              gr.Markdown("**OR**")
+              audio_url_input = gr.Textbox(label="URL", placeholder="https://example.com/audio.wav")
+          gr.Markdown("### 📝 Subtitle")
           with gr.Group():
               subtitle_input = gr.File(label="Upload (.srt)", file_types=[".srt"], type="filepath")
+              gr.Markdown("**OR**")
+              subtitle_url_input = gr.Textbox(label="URL", placeholder="https://example.com/subtitles.srt")
+          gr.Markdown("### 📚 Book Cover (Optional)")
+          book_id_input = gr.Textbox(label="Google Books ID", placeholder="wyaEDwAAQBAJ")
+          gr.Markdown("### ✨ Settings")
+          enable_highlight = gr.Checkbox(label="Word Highlighting", value=True)
+          highlight_color = gr.Dropdown(choices=['yellow', 'orange', 'green', 'cyan', 'pink', 'red', 'blue'], value='yellow', label="Color")
+          font_size = gr.Slider(12, 32, 18, step=2, label="Font Size")
+          crf_input = gr.Slider(18, 28, 23, step=1, label="Quality (CRF)")
           stitch_btn = gr.Button("🎬 Stitch Video", variant="primary", size="lg")
       with gr.Column():
+          gr.Markdown("### 📊 Output")
+          status_output = gr.Textbox(label="Status", lines=14)
           video_output = gr.Video(label="Result")
+  gr.Markdown(
+      """
+      ### 📁 File Structure:
+      ```
+      project/
+      ├── app.py
+      ├── fonts/ (optional - for HF deployment)
+      │   └── komiko_axis.ttf
+      ├── reddit_template.png (optional)
+      └── komiko_axis.ttf (or in fonts/)
+      ```
+      """
+  )
   stitch_btn.click(
       fn=stitch_media,
   )
 if __name__ == "__main__":
+  app.launch(show_error=True)