efecelik commited on
Commit
c60e9e5
·
1 Parent(s): 1847d24

Add CogVideoX image-to-video generation with ZeroGPU

Browse files
Files changed (3) hide show
  1. README.md +18 -5
  2. app.py +153 -0
  3. requirements.txt +9 -0
README.md CHANGED
@@ -1,12 +1,25 @@
1
  ---
2
  title: Video Generator
3
- emoji:
4
- colorFrom: pink
5
- colorTo: pink
6
  sdk: gradio
7
- sdk_version: 6.3.0
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Video Generator
3
+ emoji: 🎬
4
+ colorFrom: purple
5
+ colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 5.9.0
8
  app_file: app.py
9
  pinned: false
10
+ hardware: zero-a10g
11
  ---
12
 
13
+ # Image to Video Generator
14
+
15
+ Upload an image and describe the motion you want. Powered by CogVideoX-5B.
16
+
17
+ ## Features
18
+ - Image-to-video generation
19
+ - Customizable motion prompts
20
+ - Adjustable video length and quality settings
21
+
22
+ ## Usage
23
+ 1. Upload an image
24
+ 2. Describe the motion you want
25
+ 3. Click Generate!
app.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import torch
3
+ import gradio as gr
4
+ import numpy as np
5
+ import random
6
+ from PIL import Image
7
+ from diffusers import CogVideoXImageToVideoPipeline
8
+ from diffusers.utils import export_to_video
9
+ import tempfile
10
+ import os
11
+
12
+ # Model configuration
13
+ MODEL_ID = "THUDM/CogVideoX-5b-I2V"
14
+ MAX_SEED = np.iinfo(np.int32).max
15
+
16
+ # Load pipeline globally (on CPU first, moved to GPU when needed)
17
+ print("Loading CogVideoX pipeline...")
18
+ pipe = CogVideoXImageToVideoPipeline.from_pretrained(
19
+ MODEL_ID,
20
+ torch_dtype=torch.bfloat16,
21
+ )
22
+ pipe.enable_model_cpu_offload()
23
+ pipe.vae.enable_slicing()
24
+ pipe.vae.enable_tiling()
25
+ print("Pipeline loaded!")
26
+
27
+ def resize_image(image: Image.Image, max_size: int = 720) -> Image.Image:
28
+ """Resize image to fit within max_size while maintaining aspect ratio."""
29
+ width, height = image.size
30
+ if max(width, height) > max_size:
31
+ if width > height:
32
+ new_width = max_size
33
+ new_height = int(height * max_size / width)
34
+ else:
35
+ new_height = max_size
36
+ new_width = int(width * max_size / height)
37
+ # Make dimensions divisible by 16
38
+ new_width = (new_width // 16) * 16
39
+ new_height = (new_height // 16) * 16
40
+ image = image.resize((new_width, new_height), Image.LANCZOS)
41
+ return image
42
+
43
+ @spaces.GPU(duration=300)
44
+ def generate_video(
45
+ image: Image.Image,
46
+ prompt: str,
47
+ negative_prompt: str = "",
48
+ num_frames: int = 49,
49
+ guidance_scale: float = 6.0,
50
+ num_inference_steps: int = 50,
51
+ seed: int = -1,
52
+ ):
53
+ """Generate video from image and prompt."""
54
+ if image is None:
55
+ raise gr.Error("Please upload an image!")
56
+
57
+ if not prompt:
58
+ prompt = "Make this image come alive with smooth, cinematic motion"
59
+
60
+ # Set seed
61
+ if seed == -1:
62
+ seed = random.randint(0, MAX_SEED)
63
+ generator = torch.Generator(device="cuda").manual_seed(seed)
64
+
65
+ # Resize image
66
+ image = resize_image(image)
67
+
68
+ # Move to GPU and generate
69
+ pipe.to("cuda")
70
+
71
+ with torch.inference_mode():
72
+ video_frames = pipe(
73
+ image=image,
74
+ prompt=prompt,
75
+ negative_prompt=negative_prompt,
76
+ num_frames=num_frames,
77
+ guidance_scale=guidance_scale,
78
+ num_inference_steps=num_inference_steps,
79
+ generator=generator,
80
+ ).frames[0]
81
+
82
+ # Export to video file
83
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f:
84
+ export_to_video(video_frames, f.name, fps=8)
85
+ return f.name, seed
86
+
87
+ # Gradio UI
88
+ with gr.Blocks(title="Video Generator") as demo:
89
+ gr.Markdown("""
90
+ # 🎬 Image to Video Generator
91
+
92
+ Upload an image and describe the motion you want. Powered by CogVideoX.
93
+
94
+ **Tips:**
95
+ - Use clear, descriptive prompts about motion (e.g., "the person waves hello", "the flower blooms")
96
+ - Keep images simple with clear subjects for best results
97
+ """)
98
+
99
+ with gr.Row():
100
+ with gr.Column():
101
+ image_input = gr.Image(type="pil", label="Upload Image")
102
+ prompt_input = gr.Textbox(
103
+ label="Prompt",
104
+ placeholder="Describe the motion you want...",
105
+ value="Make this image come alive with smooth, cinematic motion"
106
+ )
107
+ negative_prompt = gr.Textbox(
108
+ label="Negative Prompt (optional)",
109
+ placeholder="What to avoid...",
110
+ value="blurry, low quality, distorted"
111
+ )
112
+
113
+ with gr.Row():
114
+ num_frames = gr.Slider(
115
+ minimum=17, maximum=81, value=49, step=8,
116
+ label="Number of Frames"
117
+ )
118
+ guidance_scale = gr.Slider(
119
+ minimum=1.0, maximum=15.0, value=6.0, step=0.5,
120
+ label="Guidance Scale"
121
+ )
122
+
123
+ with gr.Row():
124
+ num_steps = gr.Slider(
125
+ minimum=20, maximum=100, value=50, step=5,
126
+ label="Inference Steps"
127
+ )
128
+ seed_input = gr.Number(
129
+ value=-1, label="Seed (-1 for random)"
130
+ )
131
+
132
+ generate_btn = gr.Button("🎬 Generate Video", variant="primary")
133
+
134
+ with gr.Column():
135
+ video_output = gr.Video(label="Generated Video")
136
+ seed_output = gr.Number(label="Seed Used")
137
+
138
+ generate_btn.click(
139
+ fn=generate_video,
140
+ inputs=[image_input, prompt_input, negative_prompt, num_frames, guidance_scale, num_steps, seed_input],
141
+ outputs=[video_output, seed_output]
142
+ )
143
+
144
+ gr.Examples(
145
+ examples=[
146
+ ["https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/astronaut.jpg",
147
+ "The astronaut waves at the camera while floating in space", "", 49, 6.0, 50, 42],
148
+ ],
149
+ inputs=[image_input, prompt_input, negative_prompt, num_frames, guidance_scale, num_steps, seed_input],
150
+ )
151
+
152
+ if __name__ == "__main__":
153
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ diffusers>=0.30.0
3
+ transformers
4
+ accelerate
5
+ sentencepiece
6
+ imageio
7
+ imageio-ffmpeg
8
+ pillow
9
+ numpy