Spaces:

efecelik
/

video-generator

Paused

App Files Files Community

efecelik commited on Jan 19

Commit

c60e9e5

1 Parent(s): 1847d24

Add CogVideoX image-to-video generation with ZeroGPU

Browse files

Files changed (3) hide show

README.md +18 -5
app.py +153 -0
requirements.txt +9 -0

README.md CHANGED Viewed

@@ -1,12 +1,25 @@
 ---
 title: Video Generator
-emoji: ⚡
-colorFrom: pink
-colorTo: pink
 sdk: gradio
-sdk_version: 6.3.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Video Generator
+emoji: 🎬
+colorFrom: purple
+colorTo: blue
 sdk: gradio
+sdk_version: 5.9.0
 app_file: app.py
 pinned: false
+hardware: zero-a10g
 ---
+# Image to Video Generator
+Upload an image and describe the motion you want. Powered by CogVideoX-5B.
+## Features
+- Image-to-video generation
+- Customizable motion prompts
+- Adjustable video length and quality settings
+## Usage
+1. Upload an image
+2. Describe the motion you want
+3. Click Generate!

app.py ADDED Viewed

	@@ -0,0 +1,153 @@

+import spaces
+import torch
+import gradio as gr
+import numpy as np
+import random
+from PIL import Image
+from diffusers import CogVideoXImageToVideoPipeline
+from diffusers.utils import export_to_video
+import tempfile
+import os
+# Model configuration
+MODEL_ID = "THUDM/CogVideoX-5b-I2V"
+MAX_SEED = np.iinfo(np.int32).max
+# Load pipeline globally (on CPU first, moved to GPU when needed)
+print("Loading CogVideoX pipeline...")
+pipe = CogVideoXImageToVideoPipeline.from_pretrained(
+    MODEL_ID,
+    torch_dtype=torch.bfloat16,
+)
+pipe.enable_model_cpu_offload()
+pipe.vae.enable_slicing()
+pipe.vae.enable_tiling()
+print("Pipeline loaded!")
+def resize_image(image: Image.Image, max_size: int = 720) -> Image.Image:
+    """Resize image to fit within max_size while maintaining aspect ratio."""
+    width, height = image.size
+    if max(width, height) > max_size:
+        if width > height:
+            new_width = max_size
+            new_height = int(height * max_size / width)
+        else:
+            new_height = max_size
+            new_width = int(width * max_size / height)
+        # Make dimensions divisible by 16
+        new_width = (new_width // 16) * 16
+        new_height = (new_height // 16) * 16
+        image = image.resize((new_width, new_height), Image.LANCZOS)
+    return image
+@spaces.GPU(duration=300)
+def generate_video(
+    image: Image.Image,
+    prompt: str,
+    negative_prompt: str = "",
+    num_frames: int = 49,
+    guidance_scale: float = 6.0,
+    num_inference_steps: int = 50,
+    seed: int = -1,
+):
+    """Generate video from image and prompt."""
+    if image is None:
+        raise gr.Error("Please upload an image!")
+    if not prompt:
+        prompt = "Make this image come alive with smooth, cinematic motion"
+    # Set seed
+    if seed == -1:
+        seed = random.randint(0, MAX_SEED)
+    generator = torch.Generator(device="cuda").manual_seed(seed)
+    # Resize image
+    image = resize_image(image)
+    # Move to GPU and generate
+    pipe.to("cuda")
+    with torch.inference_mode():
+        video_frames = pipe(
+            image=image,
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            num_frames=num_frames,
+            guidance_scale=guidance_scale,
+            num_inference_steps=num_inference_steps,
+            generator=generator,
+        ).frames[0]
+    # Export to video file
+    with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f:
+        export_to_video(video_frames, f.name, fps=8)
+        return f.name, seed
+# Gradio UI
+with gr.Blocks(title="Video Generator") as demo:
+    gr.Markdown("""
+    # 🎬 Image to Video Generator
+    Upload an image and describe the motion you want. Powered by CogVideoX.
+    **Tips:**
+    - Use clear, descriptive prompts about motion (e.g., "the person waves hello", "the flower blooms")
+    - Keep images simple with clear subjects for best results
+    """)
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(type="pil", label="Upload Image")
+            prompt_input = gr.Textbox(
+                label="Prompt",
+                placeholder="Describe the motion you want...",
+                value="Make this image come alive with smooth, cinematic motion"
+            )
+            negative_prompt = gr.Textbox(
+                label="Negative Prompt (optional)",
+                placeholder="What to avoid...",
+                value="blurry, low quality, distorted"
+            )
+            with gr.Row():
+                num_frames = gr.Slider(
+                    minimum=17, maximum=81, value=49, step=8,
+                    label="Number of Frames"
+                )
+                guidance_scale = gr.Slider(
+                    minimum=1.0, maximum=15.0, value=6.0, step=0.5,
+                    label="Guidance Scale"
+                )
+            with gr.Row():
+                num_steps = gr.Slider(
+                    minimum=20, maximum=100, value=50, step=5,
+                    label="Inference Steps"
+                )
+                seed_input = gr.Number(
+                    value=-1, label="Seed (-1 for random)"
+                )
+            generate_btn = gr.Button("🎬 Generate Video", variant="primary")
+        with gr.Column():
+            video_output = gr.Video(label="Generated Video")
+            seed_output = gr.Number(label="Seed Used")
+    generate_btn.click(
+        fn=generate_video,
+        inputs=[image_input, prompt_input, negative_prompt, num_frames, guidance_scale, num_steps, seed_input],
+        outputs=[video_output, seed_output]
+    )
+    gr.Examples(
+        examples=[
+            ["https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/astronaut.jpg",
+             "The astronaut waves at the camera while floating in space", "", 49, 6.0, 50, 42],
+        ],
+        inputs=[image_input, prompt_input, negative_prompt, num_frames, guidance_scale, num_steps, seed_input],
+    )
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+torch
+diffusers>=0.30.0
+transformers
+accelerate
+sentencepiece
+imageio
+imageio-ffmpeg
+pillow
+numpy