video-generator / app.py
efecelik's picture
Remove gr.Examples to fix runtime error
6b6ca52
import spaces
import torch
import gradio as gr
import numpy as np
import random
from PIL import Image
from diffusers import CogVideoXImageToVideoPipeline
from diffusers.utils import export_to_video
import tempfile
import os
# Model configuration
MODEL_ID = "THUDM/CogVideoX-5b-I2V"
MAX_SEED = np.iinfo(np.int32).max
# Load pipeline globally (on CPU first, moved to GPU when needed)
print("Loading CogVideoX pipeline...")
pipe = CogVideoXImageToVideoPipeline.from_pretrained(
MODEL_ID,
torch_dtype=torch.bfloat16,
)
pipe.enable_model_cpu_offload()
pipe.vae.enable_slicing()
pipe.vae.enable_tiling()
print("Pipeline loaded!")
def resize_image(image: Image.Image, max_size: int = 720) -> Image.Image:
"""Resize image to fit within max_size while maintaining aspect ratio."""
width, height = image.size
if max(width, height) > max_size:
if width > height:
new_width = max_size
new_height = int(height * max_size / width)
else:
new_height = max_size
new_width = int(width * max_size / height)
# Make dimensions divisible by 16
new_width = (new_width // 16) * 16
new_height = (new_height // 16) * 16
image = image.resize((new_width, new_height), Image.LANCZOS)
return image
@spaces.GPU(duration=300)
def generate_video(
image: Image.Image,
prompt: str,
negative_prompt: str = "",
num_frames: int = 49,
guidance_scale: float = 6.0,
num_inference_steps: int = 50,
seed: int = -1,
):
"""Generate video from image and prompt."""
if image is None:
raise gr.Error("Please upload an image!")
if not prompt:
prompt = "Make this image come alive with smooth, cinematic motion"
# Set seed
if seed == -1:
seed = random.randint(0, MAX_SEED)
generator = torch.Generator(device="cuda").manual_seed(seed)
# Resize image
image = resize_image(image)
# Move to GPU and generate
pipe.to("cuda")
with torch.inference_mode():
video_frames = pipe(
image=image,
prompt=prompt,
negative_prompt=negative_prompt,
num_frames=num_frames,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
generator=generator,
).frames[0]
# Export to video file
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f:
export_to_video(video_frames, f.name, fps=8)
return f.name, seed
# Gradio UI
with gr.Blocks(title="Video Generator") as demo:
gr.Markdown("""
# 🎬 Image to Video Generator
Upload an image and describe the motion you want. Powered by CogVideoX.
**Tips:**
- Use clear, descriptive prompts about motion (e.g., "the person waves hello", "the flower blooms")
- Keep images simple with clear subjects for best results
""")
with gr.Row():
with gr.Column():
image_input = gr.Image(type="pil", label="Upload Image")
prompt_input = gr.Textbox(
label="Prompt",
placeholder="Describe the motion you want...",
value="Make this image come alive with smooth, cinematic motion"
)
negative_prompt = gr.Textbox(
label="Negative Prompt (optional)",
placeholder="What to avoid...",
value="blurry, low quality, distorted"
)
with gr.Row():
num_frames = gr.Slider(
minimum=17, maximum=81, value=49, step=8,
label="Number of Frames"
)
guidance_scale = gr.Slider(
minimum=1.0, maximum=15.0, value=6.0, step=0.5,
label="Guidance Scale"
)
with gr.Row():
num_steps = gr.Slider(
minimum=20, maximum=100, value=50, step=5,
label="Inference Steps"
)
seed_input = gr.Number(
value=-1, label="Seed (-1 for random)"
)
generate_btn = gr.Button("🎬 Generate Video", variant="primary")
with gr.Column():
video_output = gr.Video(label="Generated Video")
seed_output = gr.Number(label="Seed Used")
generate_btn.click(
fn=generate_video,
inputs=[image_input, prompt_input, negative_prompt, num_frames, guidance_scale, num_steps, seed_input],
outputs=[video_output, seed_output]
)
if __name__ == "__main__":
demo.launch()