#!/usr/bin/env python3 """ app.py - HuggingFace Space Image + Audio -> 720p MP4 (H.264/AAC when available) + thumbnail extraction """ import gradio as gr import subprocess, pathlib, uuid, math, os, sys, time # Output dir OUTPUT_DIR = pathlib.Path("generated") OUTPUT_DIR.mkdir(exist_ok=True) # Allowed types ALLOW_IMG = (".png", ".jpg", ".jpeg") ALLOW_AUD = (".wav", ".mp3", ".m4a", ".flac", ".aac") def detect_codecs(): """Detect available encoders and choose codecs.""" try: enc = subprocess.run(["ffmpeg", "-encoders"], text=True, capture_output=True, check=True, timeout=5).stdout except Exception as e: print("⚠️ ffmpeg encoders detection failed:", e, file=sys.stderr) return ("libx264", "libmp3lame") # safe fallback supports_h264 = "libx264" in enc supports_aac = "aac" in enc or "libfdk_aac" in enc supports_mp3 = "libmp3lame" in enc if supports_h264 and supports_aac: print("✅ Selected codecs: libx264 + aac") return ("libx264", "aac") if supports_h264 and supports_mp3: print("⚠️ Selected codecs: libx264 + libmp3lame (AAC missing)") return ("libx264", "libmp3lame") if supports_mp3: print("⚠️ Selected codecs: mpeg4 + libmp3lame (H.264 missing)") return ("mpeg4", "libmp3lame") print("❌ Falling back to mpeg4 + libmp3lame") return ("mpeg4", "libmp3lame") VIDEO_CODEC, AUDIO_CODEC = detect_codecs() def get_audio_duration_seconds(audio_path): """Return rounded-up duration in seconds using ffprobe.""" try: probe = subprocess.run( ["ffprobe","-v","error","-show_entries","format=duration", "-of","default=noprint_wrappers=1:nokey=1", str(audio_path)], text=True, capture_output=True, check=True ) dur = float(probe.stdout.strip()) return math.ceil(dur) except Exception as e: print("ffprobe failed:", e, file=sys.stderr) return None def run_ffmpeg(cmd): """Run ffmpeg command and print stderr progressively.""" print("Running FFmpeg:", " ".join(cmd)) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) stderr_accum = [] # Print ffmpeg stderr lines for logs for line in proc.stderr: stderr_accum.append(line) # keep console logs short but helpful if "time=" in line or "frame=" in line: print(line.strip()) proc.wait() stderr_text = "".join(stderr_accum) return proc.returncode, stderr_text def make_video(image, audio): """Generate 720p mp4 and thumbnail""" if not image or not audio: raise gr.Error("Please upload both an image and an audio file.") uid = uuid.uuid4().hex img_p = pathlib.Path(image) aud_p = pathlib.Path(audio) out_mp4 = OUTPUT_DIR / f"{uid}.mp4" thumb_jpg = OUTPUT_DIR / f"{uid}_thumb.jpg" dur = get_audio_duration_seconds(aud_p) if not dur: raise gr.Error("Unable to read audio duration (ffprobe failed).") print(f"Audio duration: {dur}s") # Target 720p W, H = 1280, 720 # Build filter string (scale preserving aspect ratio then pad) vf = f"scale={W}:{H}:force_original_aspect_ratio=decrease,pad={W}:{H}:(ow-iw)/2:(oh-ih)/2" # Common args cmd = [ "ffmpeg", "-y", # input image looped "-loop", "1", "-framerate", "1", "-i", str(img_p), # input audio "-i", str(aud_p), # video filter "-vf", vf, # set low fps (static image) "-r", "1", # ensure colors "-pix_fmt", "yuv420p", # container optimizations "-movflags", "+faststart", # stop at shortest (audio) "-shortest", "-t", str(dur) ] # Video codec params if VIDEO_CODEC == "libx264": cmd += ["-c:v", "libx264", "-preset", "medium", "-crf", "22", "-g", "120"] else: cmd += ["-c:v", "mpeg4", "-b:v", "1200k"] # Audio codec params if AUDIO_CODEC == "aac": # system aac encoder (ffmpeg builtin) cmd += ["-c:a", "aac", "-b:a", "192k", "-ar", "48000"] elif AUDIO_CODEC == "libmp3lame": cmd += ["-c:a", "libmp3lame", "-b:a", "192k", "-ar", "44100"] else: cmd += ["-c:a", AUDIO_CODEC, "-b:a", "192k", "-ar", "44100"] # Output file cmd += [str(out_mp4)] # Run ffmpeg code, stderr = run_ffmpeg(cmd) if code != 0: print("FFmpeg failed. stderr:") print(stderr) raise gr.Error("FFmpeg encoding failed. See Space logs.") # Extract thumbnail at 1 second (or earliest available if audio shorter) thumb_time = min(1, max(0, dur - 1)) thumb_cmd = [ "ffmpeg", "-y", "-i", str(out_mp4), "-ss", f"{thumb_time}", "-vframes", "1", "-q:v", "3", str(thumb_jpg) ] code2, stderr2 = run_ffmpeg(thumb_cmd) if code2 != 0: print("Thumbnail extraction failed. stderr:") print(stderr2) # Not fatal — proceed without thumbnail if thumb_jpg.exists(): thumb_path = str(thumb_jpg) else: thumb_path = None else: thumb_path = str(thumb_jpg) size_mb = out_mp4.stat().st_size / (1024*1024) print(f"Generated: {out_mp4} ({size_mb:.1f} MB). Thumbnail: {thumb_path}") return str(out_mp4), thumb_path # --- Gradio UI --- with gr.Blocks(title="Image + Audio → 720p MP4 + Thumbnail") as demo: gr.Markdown("Upload cover image + audio. Outputs: downloadable MP4 (720p) and thumbnail JPG (for YouTube).") with gr.Row(): with gr.Column(): img_in = gr.Image(type="filepath", label="Cover image") gr.Markdown("Recommended: 1280x720 or larger (PNG/JPG)") with gr.Column(): aud_in = gr.Audio(type="filepath", label="Audio file") gr.Markdown("Supports: MP3, WAV, M4A, FLAC, AAC") gen_btn = gr.Button("Generate video") vid_out = gr.Video(label="Generated video") thumb_out = gr.Image(label="Thumbnail (1s)", type="filepath") gen_btn.click(make_video, inputs=[img_in, aud_in], outputs=[vid_out, thumb_out]) demo.queue(max_size=4) demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)), share=False)