Spaces:
Paused
Paused
Commit
·
2fa4732
1
Parent(s):
068b511
add: demo
Browse files- app.py +41 -55
- requirements.txt +0 -1
app.py
CHANGED
|
@@ -1,15 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
from diffusers.utils import export_to_video
|
| 4 |
-
from cogvideox_interpolation.pipeline import CogVideoXInterpolationPipeline
|
| 5 |
from PIL import Image
|
| 6 |
-
|
| 7 |
-
import
|
| 8 |
|
| 9 |
# Global variable to store the pipeline
|
| 10 |
pipe = None
|
| 11 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 12 |
|
|
|
|
| 13 |
def load_model(model_path):
|
| 14 |
"""Load the CogVideoX-Interpolation model"""
|
| 15 |
global pipe
|
|
@@ -20,10 +23,7 @@ def load_model(model_path):
|
|
| 20 |
# Determine dtype based on model variant
|
| 21 |
dtype = torch.bfloat16 if "5b" in model_path.lower() else torch.float16
|
| 22 |
|
| 23 |
-
pipe = CogVideoXInterpolationPipeline.from_pretrained(
|
| 24 |
-
model_path,
|
| 25 |
-
torch_dtype=dtype
|
| 26 |
-
)
|
| 27 |
|
| 28 |
# Memory optimization
|
| 29 |
if device == "cuda":
|
|
@@ -37,6 +37,7 @@ def load_model(model_path):
|
|
| 37 |
print("Model loaded successfully!")
|
| 38 |
return "✓ Model loaded successfully!"
|
| 39 |
|
|
|
|
| 40 |
def generate_interpolation(
|
| 41 |
first_image,
|
| 42 |
last_image,
|
|
@@ -45,7 +46,7 @@ def generate_interpolation(
|
|
| 45 |
num_inference_steps=50,
|
| 46 |
guidance_scale=6.0,
|
| 47 |
fps=8,
|
| 48 |
-
seed=42
|
| 49 |
):
|
| 50 |
"""Generate interpolated video between two keyframes"""
|
| 51 |
|
|
@@ -66,7 +67,9 @@ def generate_interpolation(
|
|
| 66 |
last_image = Image.fromarray(last_image)
|
| 67 |
|
| 68 |
print(f"Generating video with prompt: {prompt}")
|
| 69 |
-
print(
|
|
|
|
|
|
|
| 70 |
|
| 71 |
# Generate video
|
| 72 |
generator = torch.Generator(device=device).manual_seed(seed)
|
|
@@ -99,9 +102,11 @@ def generate_interpolation(
|
|
| 99 |
print(error_msg)
|
| 100 |
return None, error_msg
|
| 101 |
|
|
|
|
| 102 |
# Create Gradio interface
|
| 103 |
with gr.Blocks(title="CogVideoX Keyframe Interpolation") as demo:
|
| 104 |
-
gr.Markdown(
|
|
|
|
| 105 |
# 🎬 CogVideoX Keyframe Interpolation
|
| 106 |
|
| 107 |
Generate smooth video transitions between two keyframe images using AI.
|
|
@@ -111,7 +116,8 @@ with gr.Blocks(title="CogVideoX Keyframe Interpolation") as demo:
|
|
| 111 |
2. Upload start and end frame images
|
| 112 |
3. Describe the motion/transition in the text prompt
|
| 113 |
4. Adjust parameters and generate!
|
| 114 |
-
"""
|
|
|
|
| 115 |
|
| 116 |
with gr.Row():
|
| 117 |
with gr.Column():
|
|
@@ -119,7 +125,7 @@ with gr.Blocks(title="CogVideoX Keyframe Interpolation") as demo:
|
|
| 119 |
model_path_input = gr.Textbox(
|
| 120 |
label="Model Path",
|
| 121 |
placeholder="e.g., /path/to/CogVideoX-5b-I2V-inter or feizhengcong/CogvideoX-Interpolation",
|
| 122 |
-
value="feizhengcong/CogvideoX-Interpolation"
|
| 123 |
)
|
| 124 |
load_btn = gr.Button("Load Model", variant="primary")
|
| 125 |
model_status = gr.Textbox(label="Status", interactive=False)
|
|
@@ -129,23 +135,15 @@ with gr.Blocks(title="CogVideoX Keyframe Interpolation") as demo:
|
|
| 129 |
with gr.Row():
|
| 130 |
with gr.Column():
|
| 131 |
gr.Markdown("### 🖼️ Input Keyframes")
|
| 132 |
-
first_image_input = gr.Image(
|
| 133 |
-
|
| 134 |
-
type="pil",
|
| 135 |
-
height=300
|
| 136 |
-
)
|
| 137 |
-
last_image_input = gr.Image(
|
| 138 |
-
label="End Frame",
|
| 139 |
-
type="pil",
|
| 140 |
-
height=300
|
| 141 |
-
)
|
| 142 |
|
| 143 |
with gr.Column():
|
| 144 |
gr.Markdown("### ⚙️ Generation Settings")
|
| 145 |
prompt_input = gr.Textbox(
|
| 146 |
label="Motion Description",
|
| 147 |
placeholder="Describe the motion/transition between the frames...",
|
| 148 |
-
lines=4
|
| 149 |
)
|
| 150 |
|
| 151 |
with gr.Row():
|
|
@@ -155,14 +153,10 @@ with gr.Blocks(title="CogVideoX Keyframe Interpolation") as demo:
|
|
| 155 |
maximum=49,
|
| 156 |
step=4,
|
| 157 |
value=49,
|
| 158 |
-
info="Must be 4k+1 format (13, 17, 21, ..., 49)"
|
| 159 |
)
|
| 160 |
fps_slider = gr.Slider(
|
| 161 |
-
label="FPS",
|
| 162 |
-
minimum=4,
|
| 163 |
-
maximum=16,
|
| 164 |
-
step=2,
|
| 165 |
-
value=8
|
| 166 |
)
|
| 167 |
|
| 168 |
with gr.Row():
|
|
@@ -172,7 +166,7 @@ with gr.Blocks(title="CogVideoX Keyframe Interpolation") as demo:
|
|
| 172 |
maximum=100,
|
| 173 |
step=5,
|
| 174 |
value=50,
|
| 175 |
-
info="More steps = better quality but slower"
|
| 176 |
)
|
| 177 |
guidance_slider = gr.Slider(
|
| 178 |
label="Guidance Scale",
|
|
@@ -180,14 +174,10 @@ with gr.Blocks(title="CogVideoX Keyframe Interpolation") as demo:
|
|
| 180 |
maximum=15.0,
|
| 181 |
step=0.5,
|
| 182 |
value=6.0,
|
| 183 |
-
info="Higher = stronger prompt following"
|
| 184 |
)
|
| 185 |
|
| 186 |
-
seed_input = gr.Number(
|
| 187 |
-
label="Random Seed",
|
| 188 |
-
value=42,
|
| 189 |
-
precision=0
|
| 190 |
-
)
|
| 191 |
|
| 192 |
generate_btn = gr.Button("🎬 Generate Video", variant="primary", size="lg")
|
| 193 |
|
|
@@ -204,22 +194,20 @@ with gr.Blocks(title="CogVideoX Keyframe Interpolation") as demo:
|
|
| 204 |
gr.Markdown("### 💡 Example Prompts")
|
| 205 |
gr.Examples(
|
| 206 |
examples=[
|
| 207 |
-
[
|
|
|
|
|
|
|
| 208 |
["The camera smoothly pans from left to right, revealing the scene."],
|
| 209 |
["A dancer gracefully transitions from one pose to another."],
|
| 210 |
["The sun sets gradually, changing the lighting and colors of the scene."],
|
| 211 |
["A car accelerates down the street, moving from standstill to motion."],
|
| 212 |
],
|
| 213 |
inputs=prompt_input,
|
| 214 |
-
label="Click to use example prompts"
|
| 215 |
)
|
| 216 |
|
| 217 |
# Event handlers
|
| 218 |
-
load_btn.click(
|
| 219 |
-
fn=load_model,
|
| 220 |
-
inputs=[model_path_input],
|
| 221 |
-
outputs=[model_status]
|
| 222 |
-
)
|
| 223 |
|
| 224 |
generate_btn.click(
|
| 225 |
fn=generate_interpolation,
|
|
@@ -231,24 +219,22 @@ with gr.Blocks(title="CogVideoX Keyframe Interpolation") as demo:
|
|
| 231 |
num_steps_slider,
|
| 232 |
guidance_slider,
|
| 233 |
fps_slider,
|
| 234 |
-
seed_input
|
| 235 |
],
|
| 236 |
-
outputs=[output_video, generation_status]
|
| 237 |
)
|
| 238 |
|
| 239 |
if __name__ == "__main__":
|
| 240 |
-
print("="*50)
|
| 241 |
print("CogVideoX Keyframe Interpolation Gradio App")
|
| 242 |
-
print("="*50)
|
| 243 |
print(f"Device: {device}")
|
| 244 |
print(f"CUDA available: {torch.cuda.is_available()}")
|
| 245 |
if torch.cuda.is_available():
|
| 246 |
print(f"GPU: {torch.cuda.get_device_name(0)}")
|
| 247 |
-
print(
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
share=False
|
| 254 |
-
)
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import tempfile
|
| 3 |
+
|
| 4 |
import gradio as gr
|
| 5 |
import torch
|
| 6 |
from diffusers.utils import export_to_video
|
|
|
|
| 7 |
from PIL import Image
|
| 8 |
+
|
| 9 |
+
from cogvideox_interpolation.pipeline import CogVideoXInterpolationPipeline
|
| 10 |
|
| 11 |
# Global variable to store the pipeline
|
| 12 |
pipe = None
|
| 13 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 14 |
|
| 15 |
+
|
| 16 |
def load_model(model_path):
|
| 17 |
"""Load the CogVideoX-Interpolation model"""
|
| 18 |
global pipe
|
|
|
|
| 23 |
# Determine dtype based on model variant
|
| 24 |
dtype = torch.bfloat16 if "5b" in model_path.lower() else torch.float16
|
| 25 |
|
| 26 |
+
pipe = CogVideoXInterpolationPipeline.from_pretrained(model_path, torch_dtype=dtype)
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
# Memory optimization
|
| 29 |
if device == "cuda":
|
|
|
|
| 37 |
print("Model loaded successfully!")
|
| 38 |
return "✓ Model loaded successfully!"
|
| 39 |
|
| 40 |
+
|
| 41 |
def generate_interpolation(
|
| 42 |
first_image,
|
| 43 |
last_image,
|
|
|
|
| 46 |
num_inference_steps=50,
|
| 47 |
guidance_scale=6.0,
|
| 48 |
fps=8,
|
| 49 |
+
seed=42,
|
| 50 |
):
|
| 51 |
"""Generate interpolated video between two keyframes"""
|
| 52 |
|
|
|
|
| 67 |
last_image = Image.fromarray(last_image)
|
| 68 |
|
| 69 |
print(f"Generating video with prompt: {prompt}")
|
| 70 |
+
print(
|
| 71 |
+
f"Parameters: frames={num_frames}, steps={num_inference_steps}, guidance={guidance_scale}"
|
| 72 |
+
)
|
| 73 |
|
| 74 |
# Generate video
|
| 75 |
generator = torch.Generator(device=device).manual_seed(seed)
|
|
|
|
| 102 |
print(error_msg)
|
| 103 |
return None, error_msg
|
| 104 |
|
| 105 |
+
|
| 106 |
# Create Gradio interface
|
| 107 |
with gr.Blocks(title="CogVideoX Keyframe Interpolation") as demo:
|
| 108 |
+
gr.Markdown(
|
| 109 |
+
"""
|
| 110 |
# 🎬 CogVideoX Keyframe Interpolation
|
| 111 |
|
| 112 |
Generate smooth video transitions between two keyframe images using AI.
|
|
|
|
| 116 |
2. Upload start and end frame images
|
| 117 |
3. Describe the motion/transition in the text prompt
|
| 118 |
4. Adjust parameters and generate!
|
| 119 |
+
"""
|
| 120 |
+
)
|
| 121 |
|
| 122 |
with gr.Row():
|
| 123 |
with gr.Column():
|
|
|
|
| 125 |
model_path_input = gr.Textbox(
|
| 126 |
label="Model Path",
|
| 127 |
placeholder="e.g., /path/to/CogVideoX-5b-I2V-inter or feizhengcong/CogvideoX-Interpolation",
|
| 128 |
+
value="feizhengcong/CogvideoX-Interpolation",
|
| 129 |
)
|
| 130 |
load_btn = gr.Button("Load Model", variant="primary")
|
| 131 |
model_status = gr.Textbox(label="Status", interactive=False)
|
|
|
|
| 135 |
with gr.Row():
|
| 136 |
with gr.Column():
|
| 137 |
gr.Markdown("### 🖼️ Input Keyframes")
|
| 138 |
+
first_image_input = gr.Image(label="Start Frame", type="pil", height=300)
|
| 139 |
+
last_image_input = gr.Image(label="End Frame", type="pil", height=300)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
with gr.Column():
|
| 142 |
gr.Markdown("### ⚙️ Generation Settings")
|
| 143 |
prompt_input = gr.Textbox(
|
| 144 |
label="Motion Description",
|
| 145 |
placeholder="Describe the motion/transition between the frames...",
|
| 146 |
+
lines=4,
|
| 147 |
)
|
| 148 |
|
| 149 |
with gr.Row():
|
|
|
|
| 153 |
maximum=49,
|
| 154 |
step=4,
|
| 155 |
value=49,
|
| 156 |
+
info="Must be 4k+1 format (13, 17, 21, ..., 49)",
|
| 157 |
)
|
| 158 |
fps_slider = gr.Slider(
|
| 159 |
+
label="FPS", minimum=4, maximum=16, step=2, value=8
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
)
|
| 161 |
|
| 162 |
with gr.Row():
|
|
|
|
| 166 |
maximum=100,
|
| 167 |
step=5,
|
| 168 |
value=50,
|
| 169 |
+
info="More steps = better quality but slower",
|
| 170 |
)
|
| 171 |
guidance_slider = gr.Slider(
|
| 172 |
label="Guidance Scale",
|
|
|
|
| 174 |
maximum=15.0,
|
| 175 |
step=0.5,
|
| 176 |
value=6.0,
|
| 177 |
+
info="Higher = stronger prompt following",
|
| 178 |
)
|
| 179 |
|
| 180 |
+
seed_input = gr.Number(label="Random Seed", value=42, precision=0)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
|
| 182 |
generate_btn = gr.Button("🎬 Generate Video", variant="primary", size="lg")
|
| 183 |
|
|
|
|
| 194 |
gr.Markdown("### 💡 Example Prompts")
|
| 195 |
gr.Examples(
|
| 196 |
examples=[
|
| 197 |
+
[
|
| 198 |
+
"A person walks forward slowly, their body moving naturally with each step."
|
| 199 |
+
],
|
| 200 |
["The camera smoothly pans from left to right, revealing the scene."],
|
| 201 |
["A dancer gracefully transitions from one pose to another."],
|
| 202 |
["The sun sets gradually, changing the lighting and colors of the scene."],
|
| 203 |
["A car accelerates down the street, moving from standstill to motion."],
|
| 204 |
],
|
| 205 |
inputs=prompt_input,
|
| 206 |
+
label="Click to use example prompts",
|
| 207 |
)
|
| 208 |
|
| 209 |
# Event handlers
|
| 210 |
+
load_btn.click(fn=load_model, inputs=[model_path_input], outputs=[model_status])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
|
| 212 |
generate_btn.click(
|
| 213 |
fn=generate_interpolation,
|
|
|
|
| 219 |
num_steps_slider,
|
| 220 |
guidance_slider,
|
| 221 |
fps_slider,
|
| 222 |
+
seed_input,
|
| 223 |
],
|
| 224 |
+
outputs=[output_video, generation_status],
|
| 225 |
)
|
| 226 |
|
| 227 |
if __name__ == "__main__":
|
| 228 |
+
print("=" * 50)
|
| 229 |
print("CogVideoX Keyframe Interpolation Gradio App")
|
| 230 |
+
print("=" * 50)
|
| 231 |
print(f"Device: {device}")
|
| 232 |
print(f"CUDA available: {torch.cuda.is_available()}")
|
| 233 |
if torch.cuda.is_available():
|
| 234 |
print(f"GPU: {torch.cuda.get_device_name(0)}")
|
| 235 |
+
print(
|
| 236 |
+
f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB"
|
| 237 |
+
)
|
| 238 |
+
print("=" * 50)
|
| 239 |
+
|
| 240 |
+
demo.launch()
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
diffusers==0.30.3
|
| 2 |
transformers==4.44.2
|
| 3 |
accelerate==0.34.0
|
| 4 |
-
gradio>=4.0.0
|
| 5 |
torch>=2.0.0
|
| 6 |
torchvision
|
| 7 |
Pillow
|
|
|
|
| 1 |
diffusers==0.30.3
|
| 2 |
transformers==4.44.2
|
| 3 |
accelerate==0.34.0
|
|
|
|
| 4 |
torch>=2.0.0
|
| 5 |
torchvision
|
| 6 |
Pillow
|