Video_stitch / app.py
sampleacc-3003's picture
Update app.py
c074e33 verified
import gradio as gr
import subprocess
import static_ffmpeg
import os
import tempfile
import requests
import re
import textwrap
import shutil
from datetime import datetime
from PIL import Image, ImageDraw, ImageFont
# ========================================
# CONFIGURATION SECTION - CUSTOMIZE HERE
# ========================================
# Reddit Template Text Settings
REDDIT_CONFIG = {
'template_file': 'reddit_template.png', # Template filename in script directory
'font_file': 'RFDewi-Bold.ttf', # Font file for Reddit text
'font_size_max': 180, # Maximum font size to try
'font_size_min': 16, # Minimum font size (if text too long)
'text_wrap_width': 35, # Characters per line for wrapping
'text_color': 'black', # Text color
'line_spacing': 10, # Spacing between lines
'text_box_width_percent': 0.85, # 80% of template width
'text_box_height_percent': 0.65, # 50% of template height
'y_offset': 20, # Vertical offset from center
}
# Word-by-Word Subtitle Settings
SUBTITLE_CONFIG = {
'font_file': 'komiko_axis.ttf', # Font file for subtitles (TTF or OTF)
'font_name': 'Komika Axis', # Font name as it appears in system
'font_size_default': 12, # Default subtitle font size
'position_alignment': 5, # 5 = center (1-9 numpad layout)
'margin_left': 20,
'margin_right': 20,
'margin_vertical': 0,
}
# Video Processing Settings
VIDEO_CONFIG = {
'reddit_scale_percent': 0.75, # Reddit template size (0.75 = 75% of video width)
'fade_start_percent': 0.70, # When fade to color starts (60%)
'fade_end_percent': 0.83, # When fully faded to color (75%)
'promo_percent': 0.1, # Last 10% for book cover
'fade_color_rgb': (218, 207, 195), # Fade color RGB
'book_fade_in_duration': 2, # Book cover fade-in duration (seconds)
}
# ========================================
# END CONFIGURATION SECTION
# ========================================
# Add static ffmpeg to PATH
static_ffmpeg.add_paths()
def setup_custom_fonts_hf(temp_dir):
"""
Setup custom fonts for FFmpeg/libass - Hugging Face Spaces compatible.
File Structure Required:
project/
β”œβ”€β”€ app.py
β”œβ”€β”€ fonts/
β”‚ β”œβ”€β”€ komiko_axis.ttf (or your fonts)
β”‚ └── (other fonts...)
└── reddit_template.png
Returns: environment dict with FONTCONFIG configured
"""
try:
fonts_dir = os.path.join(temp_dir, 'fonts')
os.makedirs(fonts_dir, exist_ok=True)
# Get script directory and check for fonts/ subdirectory
script_dir = os.path.dirname(os.path.abspath(__file__))
repo_fonts_dir = os.path.join(script_dir, 'fonts')
# Also check for fonts in script root (fallback)
fonts_to_copy = []
# Check fonts/ subdirectory first
if os.path.exists(repo_fonts_dir):
for font_file in os.listdir(repo_fonts_dir):
if font_file.endswith(('.ttf', '.otf', '.TTF', '.OTF')):
fonts_to_copy.append(os.path.join(repo_fonts_dir, font_file))
# Check script root directory for fonts
for item in [REDDIT_CONFIG['font_file'], SUBTITLE_CONFIG['font_file']]:
font_path = os.path.join(script_dir, item)
if os.path.exists(font_path) and font_path not in fonts_to_copy:
fonts_to_copy.append(font_path)
# Copy all found fonts
for src in fonts_to_copy:
dst = os.path.join(fonts_dir, os.path.basename(src))
shutil.copy(src, dst)
if fonts_to_copy:
# Create fonts.conf for fontconfig
fonts_conf = f"""<?xml version="1.0"?>
<fontconfig>
<dir>{fonts_dir}</dir>
<cachedir>{temp_dir}/cache</cachedir>
</fontconfig>
"""
conf_path = os.path.join(temp_dir, 'fonts.conf')
with open(conf_path, 'w') as f:
f.write(fonts_conf)
# Set environment variables
env = os.environ.copy()
env['FONTCONFIG_FILE'] = conf_path
env['FONTCONFIG_PATH'] = temp_dir
return env
# Fallback to normal environment
return os.environ.copy()
except Exception as e:
return os.environ.copy()
def download_file_from_url(url, output_dir, filename):
"""Download a file from URL and save it to output directory."""
try:
response = requests.get(url, stream=True, timeout=30)
response.raise_for_status()
file_path = os.path.join(output_dir, filename)
with open(file_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
return file_path
except Exception as e:
raise Exception(f"Failed to download file from URL: {str(e)}")
def download_book_cover(book_id, output_dir):
"""Download book cover from Google Books API using Book ID."""
try:
image_url = f"https://books.google.com/books/publisher/content/images/frontcover/{book_id}"
response = requests.get(image_url, timeout=30)
response.raise_for_status()
image_path = os.path.join(output_dir, 'book_cover.png')
with open(image_path, 'wb') as f:
f.write(response.content)
img = Image.open(image_path)
img.verify()
return image_path
except Exception as e:
raise Exception(f"Failed to download book cover: {str(e)}")
def get_video_info(video_path):
"""Get video resolution and frame rate using ffprobe."""
try:
cmd_res = [
"ffprobe", "-v", "error", "-select_streams", "v:0",
"-show_entries", "stream=width,height", "-of", "csv=s=x:p=0", video_path
]
result = subprocess.run(cmd_res, capture_output=True, text=True, check=True)
width, height = result.stdout.strip().split('x')
cmd_fps = [
"ffprobe", "-v", "error", "-select_streams", "v:0",
"-show_entries", "stream=r_frame_rate", "-of", "default=noprint_wrappers=1:nokey=1", video_path
]
result = subprocess.run(cmd_fps, capture_output=True, text=True, check=True)
fps_str = result.stdout.strip()
if '/' in fps_str:
num, den = fps_str.split('/')
fps = float(num) / float(den)
else:
fps = float(fps_str)
return int(width), int(height), fps
except Exception as e:
raise Exception(f"Failed to get video info: {str(e)}")
def get_audio_duration(audio_path):
"""Get audio duration in seconds using ffprobe."""
try:
cmd = [
"ffprobe", "-v", "error", "-show_entries", "format=duration",
"-of", "default=noprint_wrappers=1:nokey=1", audio_path
]
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
return float(result.stdout.strip())
except Exception as e:
raise Exception(f"Failed to get audio duration: {str(e)}")
def extract_first_subtitle(srt_path):
"""Extract first subtitle entry. Returns: (text, start_sec, end_sec)"""
try:
with open(srt_path, 'r', encoding='utf-8') as f:
content = f.read()
blocks = re.split(r'\n\s*\n', content.strip())
if not blocks:
return "No subtitle found", 0.0, 3.0
first_block = blocks[0].strip().split('\n')
if len(first_block) >= 3:
times = first_block[1].split(' --> ')
def time_to_sec(t):
h, m, s = t.split(':')
s, ms = s.split(',')
return int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 1000.0
start_sec = time_to_sec(times[0].strip())
end_sec = time_to_sec(times[1].strip())
text = ' '.join(first_block[2:]).strip()
return text, start_sec, end_sec
return "No subtitle found", 0.0, 3.0
except Exception as e:
raise Exception(f"Failed to extract first subtitle: {str(e)}")
def create_reddit_card_with_text(template_path, hook_text, output_dir, config=REDDIT_CONFIG):
"""
Create Reddit card with text using PIL.
Uses REDDIT_CONFIG for all styling settings.
"""
try:
template = Image.open(template_path).convert('RGBA')
template_width, template_height = template.size
text_box_width = int(template_width * config['text_box_width_percent'])
text_box_height = int(template_height * config['text_box_height_percent'])
best_font_size = config['font_size_max']
best_wrapped_text = hook_text
# Get font path
script_dir = os.path.dirname(os.path.abspath(__file__))
font_paths = [
os.path.join(script_dir, 'fonts', config['font_file']),
os.path.join(script_dir, config['font_file'])
]
# Try font sizes from max to min
for font_size in range(config['font_size_max'], config['font_size_min'] - 1, -2):
# Try loading font from multiple locations
font = None
for font_path in font_paths:
if os.path.exists(font_path):
try:
font = ImageFont.truetype(font_path, font_size)
break
except:
pass
# Fallback fonts
if font is None:
try:
font = ImageFont.truetype('Verdana', font_size)
except:
font = ImageFont.load_default()
# Wrap and measure text
wrapped = textwrap.fill(hook_text, width=config['text_wrap_width'])
draw = ImageDraw.Draw(template)
bbox = draw.multiline_textbbox((0, 0), wrapped, font=font, spacing=config['line_spacing'])
text_width = bbox[2] - bbox[0]
text_height = bbox[3] - bbox[1]
if text_width <= text_box_width and text_height <= text_box_height:
best_font_size = font_size
best_wrapped_text = wrapped
break
# Draw text with best size
font = None
for font_path in font_paths:
if os.path.exists(font_path):
try:
font = ImageFont.truetype(font_path, best_font_size)
break
except:
pass
if font is None:
try:
font = ImageFont.truetype('Verdana', best_font_size)
except:
font = ImageFont.load_default()
draw = ImageDraw.Draw(template)
bbox = draw.multiline_textbbox((0, 0), best_wrapped_text, font=font, spacing=config['line_spacing'])
text_width = bbox[2] - bbox[0]
text_height = bbox[3] - bbox[1]
x = (template_width - text_width) / 2
y = (template_height - text_height) / 2 + config['y_offset']
draw.multiline_text(
(x, y),
best_wrapped_text,
fill=config['text_color'],
font=font,
spacing=config['line_spacing'],
align='left'
)
output_path = os.path.join(output_dir, 'reddit_card_composite.png')
template.save(output_path, 'PNG')
return output_path
except Exception as e:
raise Exception(f"Failed to create Reddit card: {str(e)}")
def validate_and_get_file(uploaded_file, url_string, file_type, temp_dir):
"""Validate that only one input method is used and return the file path."""
has_upload = uploaded_file is not None
has_url = url_string and url_string.strip()
if not has_upload and not has_url:
return None, f"❌ Please provide {file_type} either by upload or URL"
if has_upload and has_url:
return None, f"❌ Please use only ONE method for {file_type}: either upload OR URL (not both)"
if has_upload:
file_path = uploaded_file.name if hasattr(uploaded_file, 'name') else uploaded_file
return file_path, None
if has_url:
try:
url_parts = url_string.strip().split('/')
original_filename = url_parts[-1] if url_parts else f"{file_type}_file"
if '.' not in original_filename:
ext_map = {'video': '.mp4', 'audio': '.wav', 'subtitle': '.srt'}
original_filename += ext_map.get(file_type, '.tmp')
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"{file_type}_{timestamp}_{original_filename}"
file_path = download_file_from_url(url_string.strip(), temp_dir, filename)
return file_path, None
except Exception as e:
return None, f"❌ Error downloading {file_type} from URL: {str(e)}"
return None, f"❌ Unknown error processing {file_type}"
def srt_time_to_ms(time_str):
"""Convert SRT timestamp to milliseconds."""
time_str = time_str.strip()
hours, minutes, seconds = time_str.split(':')
seconds, milliseconds = seconds.split(',')
return (int(hours) * 3600000 + int(minutes) * 60000 +
int(seconds) * 1000 + int(milliseconds))
def ms_to_ass_time(ms):
"""Convert milliseconds to ASS timestamp format."""
hours = ms // 3600000
ms %= 3600000
minutes = ms // 60000
ms %= 60000
seconds = ms // 1000
centiseconds = (ms % 1000) // 10
return f"{hours}:{minutes:02d}:{seconds:02d}.{centiseconds:02d}"
def create_word_by_word_highlight_ass(srt_path, output_dir, highlight_color='yellow',
font_size=None, skip_first=False, config=SUBTITLE_CONFIG):
"""
Convert SRT to ASS with word-by-word highlighting.
Uses SUBTITLE_CONFIG for all font and styling settings.
"""
if font_size is None:
font_size = config['font_size_default']
color_map = {
'yellow': ('&H0000FFFF', '&H00000000'),
'orange': ('&H0000A5FF', '&H00000000'),
'green': ('&H0000FF00', '&H00000000'),
'cyan': ('&H00FFFF00', '&H00000000'),
'pink': ('&H00FF69B4', '&H00000000'),
'red': ('&H000000FF', '&H00FFFFFF'),
'blue': ('&H00FF0000', '&H00FFFFFF'),
}
highlight_bg, highlight_text = color_map.get(highlight_color.lower(), ('&H0000FFFF', '&H00000000'))
with open(srt_path, 'r', encoding='utf-8') as f:
srt_content = f.read()
ass_path = os.path.join(output_dir, 'word_highlight_subtitles.ass')
ass_header = f"""[Script Info]
Title: Word-by-Word Highlight Subtitles
ScriptType: v4.00+
Collisions: Normal
PlayDepth: 0
[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H00000000,&H80000000,0,0,0,0,100,100,0,0,1,2,1,{config['position_alignment']},{config['margin_left']},{config['margin_right']},{config['margin_vertical']},1
[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
"""
srt_blocks = re.split(r'\n\s*\n', srt_content.strip())
ass_events = []
start_index = 1 if skip_first else 0
for block in srt_blocks[start_index:]:
lines = block.strip().split('\n')
if len(lines) >= 3:
timestamp_line = lines[1]
times = timestamp_line.split(' --> ')
if len(times) == 2:
start_ms = srt_time_to_ms(times[0])
end_ms = srt_time_to_ms(times[1])
text = ' '.join(lines[2:])
words = text.split()
if not words:
continue
total_duration = end_ms - start_ms
time_per_word = total_duration / len(words)
for i, word in enumerate(words):
word_start_ms = start_ms + int(i * time_per_word)
word_end_ms = start_ms + int((i + 1) * time_per_word)
if i == len(words) - 1:
word_end_ms = end_ms
text_parts = []
for j, w in enumerate(words):
if j == i:
text_parts.append(f"{{\\c{highlight_text}\\3c{highlight_bg}\\bord5}}{w}{{\\r}}")
else:
text_parts.append(w)
styled_text = ' '.join(text_parts)
start_time = ms_to_ass_time(word_start_ms)
end_time = ms_to_ass_time(word_end_ms)
ass_line = f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{styled_text}"
ass_events.append(ass_line)
with open(ass_path, 'w', encoding='utf-8') as f:
f.write(ass_header)
f.write('\n'.join(ass_events))
return ass_path
def stitch_media(
video_file, video_url,
audio_file, audio_url,
subtitle_file, subtitle_url,
book_id,
enable_highlight,
highlight_color,
font_size,
crf_quality=23
):
"""Main video stitching function with Reddit overlay and book cover."""
temp_dir = tempfile.mkdtemp()
try:
# Setup custom fonts environment
ffmpeg_env = setup_custom_fonts_hf(temp_dir)
# Validate files
video_path, video_error = validate_and_get_file(video_file, video_url, 'video', temp_dir)
if video_error: return None, video_error
audio_path, audio_error = validate_and_get_file(audio_file, audio_url, 'audio', temp_dir)
if audio_error: return None, audio_error
subtitle_path, subtitle_error = validate_and_get_file(subtitle_file, subtitle_url, 'subtitle', temp_dir)
if subtitle_error: return None, subtitle_error
# Get video info
video_width, video_height, video_fps = get_video_info(video_path)
audio_duration = get_audio_duration(audio_path)
status_msg = "πŸ“₯ Processing files:\n"
status_msg += f" β€’ Video: {'URL' if video_url else 'Upload'} ({video_width}x{video_height} @ {video_fps:.2f}fps)\n"
status_msg += f" β€’ Audio: {'URL' if audio_url else 'Upload'} ({audio_duration:.2f}s)\n"
status_msg += f" β€’ Subtitle: {'URL' if subtitle_url else 'Upload'}\n"
# Check for Reddit template
script_dir = os.path.dirname(os.path.abspath(__file__))
reddit_template_path = os.path.join(script_dir, REDDIT_CONFIG['template_file'])
has_reddit_template = os.path.exists(reddit_template_path)
if has_reddit_template:
status_msg += " β€’ Reddit template: βœ… Found\n"
try:
first_sub_text, first_sub_start, first_sub_end = extract_first_subtitle(subtitle_path)
status_msg += f"\nπŸ“± Reddit Overlay:\n"
status_msg += f" β€’ Text: '{first_sub_text[:40]}...'\n"
status_msg += f" β€’ Timing: {first_sub_start:.1f}s - {first_sub_end:.1f}s\n"
reddit_card_path = create_reddit_card_with_text(
reddit_template_path, first_sub_text, temp_dir, REDDIT_CONFIG
)
status_msg += " β€’ βœ… Reddit card ready\n"
except Exception as e:
status_msg += f" β€’ ⚠️ Reddit card failed: {str(e)}\n"
has_reddit_template = False
else:
status_msg += " β€’ Reddit template: ⚠️ Not found (skipping)\n"
# Process subtitles
if enable_highlight:
status_msg += f"\n✨ Word highlighting: {highlight_color} ({font_size}px)\n"
subtitle_to_use = create_word_by_word_highlight_ass(
subtitle_path, temp_dir, highlight_color, font_size,
skip_first=has_reddit_template, config=SUBTITLE_CONFIG
)
else:
subtitle_to_use = subtitle_path
subtitle_escaped = subtitle_to_use.replace('\\', '/').replace(':', '\\:')
# Check book cover
has_book_cover = book_id and book_id.strip()
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_path = os.path.join(temp_dir, f"final_{timestamp}.mp4")
# Convert RGB to BGR hex
r, g, b = VIDEO_CONFIG['fade_color_rgb']
fade_color_hex = f"#dacfc3"
if has_book_cover:
status_msg += f"\nπŸ“š Downloading book cover (ID: {book_id})...\n"
try:
book_cover_path = download_book_cover(book_id.strip(), temp_dir)
status_msg += "βœ… Book cover downloaded\n"
# Calculate timing from config
fade_starts_at = audio_duration * VIDEO_CONFIG['fade_start_percent']
fade_ends_at = audio_duration * VIDEO_CONFIG['fade_end_percent']
fade_out_duration = fade_ends_at - fade_starts_at
promo_duration = audio_duration * VIDEO_CONFIG['promo_percent']
book_appears_at = audio_duration - promo_duration
solid_color_duration = book_appears_at - fade_ends_at
main_video_duration = fade_ends_at
cover_segment_duration = promo_duration
status_msg += f"\n⏱️ Timing: Fade {fade_starts_at:.1f}β†’{fade_ends_at:.1f}s, Hold {solid_color_duration:.1f}s\n"
# STEP 1: Main video with fade-out
status_msg += "🎬 Step 1/4: Main video with fade-out...\n"
main_segment_path = os.path.join(temp_dir, f"main_{timestamp}.mp4")
cmd_main = [
"ffmpeg", "-stream_loop", "-1", "-i", video_path, "-t", str(main_video_duration),
"-vf", f"fps={video_fps},scale={video_width}:{video_height},fade=t=out:st={fade_starts_at}:d={fade_out_duration}:c={fade_color_hex}",
"-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", main_segment_path
]
subprocess.run(cmd_main, check=True, capture_output=True, text=True, env=ffmpeg_env)
# STEP 2: Solid color
status_msg += "βœ… Step 1 done\n🎬 Step 2/4: Solid color...\n"
solid_color_path = os.path.join(temp_dir, f"solid_{timestamp}.mp4")
cmd_solid = [
"ffmpeg", "-f", "lavfi",
"-i", f"color=c={fade_color_hex}:s={video_width}x{video_height}:d={solid_color_duration}:r={video_fps}",
"-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-y", solid_color_path
]
subprocess.run(cmd_solid, check=True, capture_output=True, text=True, env=ffmpeg_env)
# STEP 3: Cover with fade-in
status_msg += "βœ… Step 2 done\n🎬 Step 3/4: Cover with fade-in...\n"
cover_segment_path = os.path.join(temp_dir, f"cover_{timestamp}.mp4")
cmd_cover = [
"ffmpeg", "-loop", "1", "-i", book_cover_path, "-t", str(cover_segment_duration),
# CHANGE: Removed 'force_original_aspect_ratio' and 'pad'. Just scale to fit.
"-vf", f"scale={video_width}:{video_height},setsar=1,fps={video_fps},fade=t=in:st=0:d={VIDEO_CONFIG['book_fade_in_duration']}:c={fade_color_hex}",
"-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", cover_segment_path
]
subprocess.run(cmd_cover, check=True, capture_output=True, text=True, env=ffmpeg_env)
# STEP 4: Concat + audio + subtitles + Reddit
status_msg += "βœ… Step 3 done\n🎬 Step 4/4: Final assembly...\n"
concat_list_path = os.path.join(temp_dir, f"concat_{timestamp}.txt")
with open(concat_list_path, 'w') as f:
f.write(f"file '{main_segment_path}'\n")
f.write(f"file '{solid_color_path}'\n")
f.write(f"file '{cover_segment_path}'\n")
if has_reddit_template:
filter_complex = (
f"[0:v]ass={subtitle_escaped}[bg];"
f"[1:v]scale={video_width}*{VIDEO_CONFIG['reddit_scale_percent']}:-1[reddit];"
f"[bg][reddit]overlay=(W-w)/2:(H-h)/2:enable='between(t,{first_sub_start},{first_sub_end})'[v]"
)
cmd_final = [
"ffmpeg", "-f", "concat", "-safe", "0", "-i", concat_list_path,
"-loop", "1", "-i", reddit_card_path, "-i", audio_path,
"-filter_complex", filter_complex, "-map", "[v]", "-map", "2:a",
"-c:v", "libx264", "-crf", str(crf_quality), "-c:a", "aac",
"-pix_fmt", "yuv420p", "-shortest", "-y", output_path
]
else:
cmd_final = [
"ffmpeg", "-f", "concat", "-safe", "0", "-i", concat_list_path, "-i", audio_path,
"-vf", f"ass={subtitle_escaped}", "-map", "0:v", "-map", "1:a",
"-c:v", "libx264", "-crf", str(crf_quality), "-c:a", "aac",
"-pix_fmt", "yuv420p", "-shortest", "-y", output_path
]
subprocess.run(cmd_final, check=True, capture_output=True, text=True, env=ffmpeg_env)
except subprocess.CalledProcessError as e:
return None, f"❌ FFmpeg error:\n{e.stderr[-1000:] if e.stderr else str(e)}"
except Exception as e:
return None, f"❌ Error: {str(e)}"
else:
# No book cover - simple loop
status_msg += "\n🎬 Creating video...\n"
if has_reddit_template:
filter_complex = (
f"[0:v]ass={subtitle_escaped}[bg];"
f"[1:v]scale={video_width}*{VIDEO_CONFIG['reddit_scale_percent']}:-1[reddit];"
f"[bg][reddit]overlay=(W-w)/2:(H-h)/2:enable='between(t,{first_sub_start},{first_sub_end})'[v]"
)
cmd = [
"ffmpeg", "-stream_loop", "-1", "-i", video_path,
"-loop", "1", "-i", reddit_card_path, "-i", audio_path,
"-filter_complex", filter_complex, "-map", "[v]", "-map", "2:a",
"-c:v", "libx264", "-crf", str(crf_quality), "-c:a", "aac",
"-shortest", "-y", output_path
]
else:
cmd = [
"ffmpeg", "-stream_loop", "-1", "-i", video_path, "-i", audio_path,
"-vf", f"ass={subtitle_escaped}", "-map", "0:v", "-map", "1:a",
"-c:v", "libx264", "-crf", str(crf_quality), "-c:a", "aac",
"-shortest", "-y", output_path
]
subprocess.run(cmd, check=True, capture_output=True, text=True, env=ffmpeg_env)
# Check output
if os.path.exists(output_path):
file_size = os.path.getsize(output_path) / (1024 * 1024)
success_msg = f"βœ… Video created successfully!\n\n"
success_msg += f"πŸ“Š Size: {file_size:.2f} MB | Duration: {audio_duration:.2f}s\n"
success_msg += f"🎨 Quality: CRF {crf_quality} | FPS: {video_fps:.2f}\n"
if has_reddit_template:
success_msg += f"πŸ“± Reddit: βœ… ({first_sub_start:.1f}-{first_sub_end:.1f}s)\n"
if has_book_cover:
success_msg += f"πŸ“š Book: βœ… (Fade: 60β†’75%, Hold: 75β†’90%, Book: 90β†’100%)\n"
success_msg += "\n" + status_msg
return output_path, success_msg
else:
return None, "❌ Output file was not created"
except Exception as e:
return None, f"❌ Error: {str(e)}"
# Gradio UI
with gr.Blocks(title="Video Stitcher", theme=gr.themes.Soft()) as app:
gr.Markdown(
f"""
# 🎬 Video Stitcher with Reddit Overlay & Book Promo ✨
**Current Configuration:**
- πŸ“± Reddit text: {REDDIT_CONFIG['font_file']} ({REDDIT_CONFIG['font_size_max']}-{REDDIT_CONFIG['font_size_min']}px)
- πŸ’¬ Subtitle: {SUBTITLE_CONFIG['font_name']} ({SUBTITLE_CONFIG['font_size_default']}px)
- 🎨 Fade color: RGB{VIDEO_CONFIG['fade_color_rgb']}
**To customize:** Edit CONFIG dictionaries at top of script
"""
)
with gr.Row():
with gr.Column():
gr.Markdown("### πŸ“Ή Video")
with gr.Group():
video_input = gr.File(label="Upload", file_types=[".mp4", ".mov", ".avi", ".mkv"], type="filepath")
gr.Markdown("**OR**")
video_url_input = gr.Textbox(label="URL", placeholder="https://example.com/video.mp4")
gr.Markdown("### 🎡 Audio")
with gr.Group():
audio_input = gr.File(label="Upload", file_types=[".wav", ".mp3", ".aac", ".m4a"], type="filepath")
gr.Markdown("**OR**")
audio_url_input = gr.Textbox(label="URL", placeholder="https://example.com/audio.wav")
gr.Markdown("### πŸ“ Subtitle")
with gr.Group():
subtitle_input = gr.File(label="Upload (.srt)", file_types=[".srt"], type="filepath")
gr.Markdown("**OR**")
subtitle_url_input = gr.Textbox(label="URL", placeholder="https://example.com/subtitles.srt")
gr.Markdown("### πŸ“š Book Cover (Optional)")
book_id_input = gr.Textbox(label="Google Books ID", placeholder="wyaEDwAAQBAJ")
gr.Markdown("### ✨ Settings")
enable_highlight = gr.Checkbox(label="Word Highlighting", value=True)
highlight_color = gr.Dropdown(choices=['yellow', 'orange', 'green', 'cyan', 'pink', 'red', 'blue'], value='yellow', label="Color")
font_size = gr.Slider(12, 32, 12, step=2, label="Font Size")
crf_input = gr.Slider(18, 28, 23, step=1, label="Quality (CRF)")
stitch_btn = gr.Button("🎬 Stitch Video", variant="primary", size="lg")
with gr.Column():
gr.Markdown("### πŸ“Š Output")
status_output = gr.Textbox(label="Status", lines=14)
video_output = gr.Video(label="Result")
gr.Markdown(
"""
### πŸ“ File Structure:
```
project/
β”œβ”€β”€ app.py
β”œβ”€β”€ fonts/ (optional - for HF deployment)
β”‚ └── komiko_axis.ttf
β”œβ”€β”€ reddit_template.png (optional)
└── komiko_axis.ttf (or in fonts/)
```
"""
)
stitch_btn.click(
fn=stitch_media,
inputs=[video_input, video_url_input, audio_input, audio_url_input,
subtitle_input, subtitle_url_input, book_id_input,
enable_highlight, highlight_color, font_size, crf_input],
outputs=[video_output, status_output]
)
if __name__ == "__main__":
app.launch(show_error=True)