import torch
from diffusers import StableVideoDiffusionPipeline
from PIL import Image
import gradio as gr
import os

HF_TOKEN = None  # Uses your Space token automatically

# Load pipeline once at startup
pipe = StableVideoDiffusionPipeline.from_pretrained(
    "stabilityai/stable-video-diffusion-img2vid",
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    use_safetensors=True,
    token=HF_TOKEN
)

device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = pipe.to(device)


def generate_video(inp_img, num_frames):
    if inp_img is None:
        return "No image uploaded!", None

    # Resize image to SVD expected size
    img = inp_img.convert("RGB").resize((576, 320))

    # Generate frames
    frames = pipe(img, num_frames=num_frames).frames[0]

    # Save frames to video
    os.makedirs("frames", exist_ok=True)
    for i, f in enumerate(frames):
        f.save(f"frames/frame_{i:03d}.png")

    # Output video filename
    out_path = "output.mp4"

    # Build MP4 video
    os.system(f"ffmpeg -y -framerate 10 -i frames/frame_%03d.png {out_path}")

    return out_path


# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 🐱 AI Image → Video Generator (SVD)")
    gr.Markdown("Upload an image and generate a short AI video using **Stable Video Diffusion img2vid**.")

    with gr.Row():
        inp_img = gr.Image(type="pil", label="Upload an input image")
        num_frames = gr.Slider(4, 24, value=8, step=1, label="Number of Frames")

    btn = gr.Button("Generate Video")

    out_vid = gr.Video(label="Generated Video")

    btn.click(generate_video, inputs=[inp_img, num_frames], outputs=out_vid)


demo.launch()