import torch from diffusers import StableVideoDiffusionPipeline from PIL import Image import gradio as gr import os HF_TOKEN = None # Uses your Space token automatically # Load pipeline once at startup pipe = StableVideoDiffusionPipeline.from_pretrained( "stabilityai/stable-video-diffusion-img2vid", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, use_safetensors=True, token=HF_TOKEN ) device = "cuda" if torch.cuda.is_available() else "cpu" pipe = pipe.to(device) def generate_video(inp_img, num_frames): if inp_img is None: return "No image uploaded!", None # Resize image to SVD expected size img = inp_img.convert("RGB").resize((576, 320)) # Generate frames frames = pipe(img, num_frames=num_frames).frames[0] # Save frames to video os.makedirs("frames", exist_ok=True) for i, f in enumerate(frames): f.save(f"frames/frame_{i:03d}.png") # Output video filename out_path = "output.mp4" # Build MP4 video os.system(f"ffmpeg -y -framerate 10 -i frames/frame_%03d.png {out_path}") return out_path # Gradio UI with gr.Blocks() as demo: gr.Markdown("# 🐱 AI Image → Video Generator (SVD)") gr.Markdown("Upload an image and generate a short AI video using **Stable Video Diffusion img2vid**.") with gr.Row(): inp_img = gr.Image(type="pil", label="Upload an input image") num_frames = gr.Slider(4, 24, value=8, step=1, label="Number of Frames") btn = gr.Button("Generate Video") out_vid = gr.Video(label="Generated Video") btn.click(generate_video, inputs=[inp_img, num_frames], outputs=out_vid) demo.launch()