Spaces:

Peeble
/

StuffMotion

Sleeping

File size: 3,269 Bytes

import gradio as gr
import torch
from diffusers import StableVideoDiffusionPipeline
from PIL import Image
import imageio
import uuid
import numpy as np
import cv2

device = "cuda" if torch.cuda.is_available() else "cpu"

pipe = None
current_model = None

# 🔄 Load model only when needed (fixes slow startup)
def load_model(model_name):
    global pipe, current_model

    if current_model == model_name:
        return pipe

    try:
        if model_name == "Fast (SVD)":
            model_id = "stabilityai/stable-video-diffusion-img2vid"
        else:
            model_id = "stabilityai/stable-video-diffusion-img2vid-xt"

        pipe = StableVideoDiffusionPipeline.from_pretrained(
            model_id,
            torch_dtype=torch.float16 if device == "cuda" else torch.float32
        )

        pipe = pipe.to(device)

        if device == "cuda":
            pipe.enable_attention_slicing()
            pipe.enable_model_cpu_offload()

        current_model = model_name
        return pipe

    except Exception as e:
        print("Model load error:", e)
        return None


# 🎥 Extract frame from video
def extract_frame(video_path):
    cap = cv2.VideoCapture(video_path)
    success, frame = cap.read()
    cap.release()

    if success:
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        return Image.fromarray(frame)
    return None


def generate_video(image, video, fps, motion_strength, model_choice):
    try:
        pipe = load_model(model_choice)
        if pipe is None:
            return None

        # Select input
        if image is not None:
            input_image = image.convert("RGB")
        elif video is not None:
            input_image = extract_frame(video)
            if input_image is None:
                return None
        else:
            return None

        # Resize (⚡ HUGE speed boost)
        input_image = input_image.resize((512, 512))

        # Generate frames (reduced for speed)
        output = pipe(
            input_image,
            num_frames=16,  # ⚡ faster
            decode_chunk_size=4,
            motion_bucket_id=int(motion_strength)
        )

        frames = output.frames[0]
        frames = [(frame * 255).astype(np.uint8) for frame in frames]

        filename = f"video_{uuid.uuid4().hex}.mp4"

        imageio.mimsave(
            filename,
            frames,
            fps=fps,
            codec="libx264"
        )

        return filename

    except Exception as e:
        print("Generation error:", e)
        return None


# 🎨 UI
with gr.Blocks() as demo:
    gr.Markdown("# 🎬 StuffMotion AI (FAST + MODEL SELECT)")

    image_input = gr.Image(type="pil", label="🖼️ Image Input")
    video_input = gr.Video(label="🎥 Video Input")

    model_choice = gr.Dropdown(
        ["Fast (SVD)", "High Quality (XT)"],
        value="Fast (SVD)",
        label="🧠 Model"
    )

    fps = gr.Slider(8, 24, value=12, step=1, label="FPS")
    motion = gr.Slider(1, 255, value=100, label="Motion")

    generate_btn = gr.Button("⚡ Generate")

    video_output = gr.Video()

    generate_btn.click(
        fn=generate_video,
        inputs=[image_input, video_input, fps, motion, model_choice],
        outputs=video_output
    )

demo.launch()