Spaces:
Sleeping
Sleeping
| import spaces | |
| import torch | |
| import gradio as gr | |
| import numpy as np | |
| import random | |
| from PIL import Image | |
| from diffusers import CogVideoXImageToVideoPipeline | |
| from diffusers.utils import export_to_video | |
| import tempfile | |
| import os | |
| # Model configuration | |
| MODEL_ID = "THUDM/CogVideoX-5b-I2V" | |
| MAX_SEED = np.iinfo(np.int32).max | |
| # Load pipeline globally (on CPU first, moved to GPU when needed) | |
| print("Loading CogVideoX pipeline...") | |
| pipe = CogVideoXImageToVideoPipeline.from_pretrained( | |
| MODEL_ID, | |
| torch_dtype=torch.bfloat16, | |
| ) | |
| pipe.enable_model_cpu_offload() | |
| pipe.vae.enable_slicing() | |
| pipe.vae.enable_tiling() | |
| print("Pipeline loaded!") | |
| def resize_image(image: Image.Image, max_size: int = 720) -> Image.Image: | |
| """Resize image to fit within max_size while maintaining aspect ratio.""" | |
| width, height = image.size | |
| if max(width, height) > max_size: | |
| if width > height: | |
| new_width = max_size | |
| new_height = int(height * max_size / width) | |
| else: | |
| new_height = max_size | |
| new_width = int(width * max_size / height) | |
| # Make dimensions divisible by 16 | |
| new_width = (new_width // 16) * 16 | |
| new_height = (new_height // 16) * 16 | |
| image = image.resize((new_width, new_height), Image.LANCZOS) | |
| return image | |
| def generate_video( | |
| image: Image.Image, | |
| prompt: str, | |
| negative_prompt: str = "", | |
| num_frames: int = 49, | |
| guidance_scale: float = 6.0, | |
| num_inference_steps: int = 50, | |
| seed: int = -1, | |
| ): | |
| """Generate video from image and prompt.""" | |
| if image is None: | |
| raise gr.Error("Please upload an image!") | |
| if not prompt: | |
| prompt = "Make this image come alive with smooth, cinematic motion" | |
| # Set seed | |
| if seed == -1: | |
| seed = random.randint(0, MAX_SEED) | |
| generator = torch.Generator(device="cuda").manual_seed(seed) | |
| # Resize image | |
| image = resize_image(image) | |
| # Move to GPU and generate | |
| pipe.to("cuda") | |
| with torch.inference_mode(): | |
| video_frames = pipe( | |
| image=image, | |
| prompt=prompt, | |
| negative_prompt=negative_prompt, | |
| num_frames=num_frames, | |
| guidance_scale=guidance_scale, | |
| num_inference_steps=num_inference_steps, | |
| generator=generator, | |
| ).frames[0] | |
| # Export to video file | |
| with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f: | |
| export_to_video(video_frames, f.name, fps=8) | |
| return f.name, seed | |
| # Gradio UI | |
| with gr.Blocks(title="Video Generator") as demo: | |
| gr.Markdown(""" | |
| # 🎬 Image to Video Generator | |
| Upload an image and describe the motion you want. Powered by CogVideoX. | |
| **Tips:** | |
| - Use clear, descriptive prompts about motion (e.g., "the person waves hello", "the flower blooms") | |
| - Keep images simple with clear subjects for best results | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| image_input = gr.Image(type="pil", label="Upload Image") | |
| prompt_input = gr.Textbox( | |
| label="Prompt", | |
| placeholder="Describe the motion you want...", | |
| value="Make this image come alive with smooth, cinematic motion" | |
| ) | |
| negative_prompt = gr.Textbox( | |
| label="Negative Prompt (optional)", | |
| placeholder="What to avoid...", | |
| value="blurry, low quality, distorted" | |
| ) | |
| with gr.Row(): | |
| num_frames = gr.Slider( | |
| minimum=17, maximum=81, value=49, step=8, | |
| label="Number of Frames" | |
| ) | |
| guidance_scale = gr.Slider( | |
| minimum=1.0, maximum=15.0, value=6.0, step=0.5, | |
| label="Guidance Scale" | |
| ) | |
| with gr.Row(): | |
| num_steps = gr.Slider( | |
| minimum=20, maximum=100, value=50, step=5, | |
| label="Inference Steps" | |
| ) | |
| seed_input = gr.Number( | |
| value=-1, label="Seed (-1 for random)" | |
| ) | |
| generate_btn = gr.Button("🎬 Generate Video", variant="primary") | |
| with gr.Column(): | |
| video_output = gr.Video(label="Generated Video") | |
| seed_output = gr.Number(label="Seed Used") | |
| generate_btn.click( | |
| fn=generate_video, | |
| inputs=[image_input, prompt_input, negative_prompt, num_frames, guidance_scale, num_steps, seed_input], | |
| outputs=[video_output, seed_output] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |