Spaces:
Paused
Paused
File size: 4,613 Bytes
c60e9e5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import spaces
import torch
import gradio as gr
import numpy as np
import random
from PIL import Image
from diffusers import CogVideoXImageToVideoPipeline
from diffusers.utils import export_to_video
import tempfile
import os
# Model configuration
MODEL_ID = "THUDM/CogVideoX-5b-I2V"
MAX_SEED = np.iinfo(np.int32).max
# Load pipeline globally (on CPU first, moved to GPU when needed)
print("Loading CogVideoX pipeline...")
pipe = CogVideoXImageToVideoPipeline.from_pretrained(
MODEL_ID,
torch_dtype=torch.bfloat16,
)
pipe.enable_model_cpu_offload()
pipe.vae.enable_slicing()
pipe.vae.enable_tiling()
print("Pipeline loaded!")
def resize_image(image: Image.Image, max_size: int = 720) -> Image.Image:
"""Resize image to fit within max_size while maintaining aspect ratio."""
width, height = image.size
if max(width, height) > max_size:
if width > height:
new_width = max_size
new_height = int(height * max_size / width)
else:
new_height = max_size
new_width = int(width * max_size / height)
# Make dimensions divisible by 16
new_width = (new_width // 16) * 16
new_height = (new_height // 16) * 16
image = image.resize((new_width, new_height), Image.LANCZOS)
return image
@spaces.GPU(duration=300)
def generate_video(
image: Image.Image,
prompt: str,
negative_prompt: str = "",
num_frames: int = 49,
guidance_scale: float = 6.0,
num_inference_steps: int = 50,
seed: int = -1,
):
"""Generate video from image and prompt."""
if image is None:
raise gr.Error("Please upload an image!")
if not prompt:
prompt = "Make this image come alive with smooth, cinematic motion"
# Set seed
if seed == -1:
seed = random.randint(0, MAX_SEED)
generator = torch.Generator(device="cuda").manual_seed(seed)
# Resize image
image = resize_image(image)
# Move to GPU and generate
pipe.to("cuda")
with torch.inference_mode():
video_frames = pipe(
image=image,
prompt=prompt,
negative_prompt=negative_prompt,
num_frames=num_frames,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
generator=generator,
).frames[0]
# Export to video file
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f:
export_to_video(video_frames, f.name, fps=8)
return f.name, seed
# Gradio UI
with gr.Blocks(title="Video Generator") as demo:
gr.Markdown("""
# 🎬 Image to Video Generator
Upload an image and describe the motion you want. Powered by CogVideoX.
**Tips:**
- Use clear, descriptive prompts about motion (e.g., "the person waves hello", "the flower blooms")
- Keep images simple with clear subjects for best results
""")
with gr.Row():
with gr.Column():
image_input = gr.Image(type="pil", label="Upload Image")
prompt_input = gr.Textbox(
label="Prompt",
placeholder="Describe the motion you want...",
value="Make this image come alive with smooth, cinematic motion"
)
negative_prompt = gr.Textbox(
label="Negative Prompt (optional)",
placeholder="What to avoid...",
value="blurry, low quality, distorted"
)
with gr.Row():
num_frames = gr.Slider(
minimum=17, maximum=81, value=49, step=8,
label="Number of Frames"
)
guidance_scale = gr.Slider(
minimum=1.0, maximum=15.0, value=6.0, step=0.5,
label="Guidance Scale"
)
with gr.Row():
num_steps = gr.Slider(
minimum=20, maximum=100, value=50, step=5,
label="Inference Steps"
)
seed_input = gr.Number(
value=-1, label="Seed (-1 for random)"
)
generate_btn = gr.Button("🎬 Generate Video", variant="primary")
with gr.Column():
video_output = gr.Video(label="Generated Video")
seed_output = gr.Number(label="Seed Used")
generate_btn.click(
fn=generate_video,
inputs=[image_input, prompt_input, negative_prompt, num_frames, guidance_scale, num_steps, seed_input],
outputs=[video_output, seed_output]
)
if __name__ == "__main__":
demo.launch()
|