Spaces:
Paused
Paused
| import os | |
| import tempfile | |
| import gradio as gr | |
| import numpy as np | |
| import spaces | |
| import torch | |
| from diffusers.utils import export_to_video | |
| from PIL import Image | |
| from cogvideox_interpolation.pipeline import CogVideoXInterpolationPipeline | |
| # Load model globally at startup | |
| print("Loading CogVideoX-Interpolation model...") | |
| MODEL_PATH = "feizhengcong/CogvideoX-Interpolation" | |
| dtype = torch.float16 | |
| pipe = CogVideoXInterpolationPipeline.from_pretrained( | |
| MODEL_PATH, | |
| torch_dtype=dtype | |
| ) | |
| pipe.vae.enable_tiling() | |
| pipe.vae.enable_slicing() | |
| print("Model loaded successfully!") | |
| def generate_interpolation( | |
| first_image, | |
| last_image, | |
| prompt, | |
| num_frames=49, | |
| num_inference_steps=50, | |
| guidance_scale=6.0, | |
| fps=8, | |
| seed=42, | |
| ): | |
| """Generate interpolated video between two keyframes""" | |
| if first_image is None or last_image is None: | |
| return None, "⚠️ Please upload both start and end frame images!" | |
| if not prompt.strip(): | |
| return None, "⚠️ Please provide a text prompt describing the motion!" | |
| try: | |
| # Convert numpy arrays to PIL Images if needed | |
| if not isinstance(first_image, Image.Image): | |
| first_image = Image.fromarray(first_image) | |
| if not isinstance(last_image, Image.Image): | |
| last_image = Image.fromarray(last_image) | |
| print(f"Generating video with prompt: {prompt}") | |
| print( | |
| f"Parameters: frames={num_frames}, steps={num_inference_steps}, guidance={guidance_scale}" | |
| ) | |
| # Move pipeline to CUDA within the GPU-decorated function | |
| pipe.to("cuda") | |
| # Generate video | |
| generator = torch.Generator(device="cuda").manual_seed(seed) | |
| video = pipe( | |
| prompt=prompt, | |
| first_image=first_image, | |
| last_image=last_image, | |
| num_videos_per_prompt=1, | |
| num_inference_steps=num_inference_steps, | |
| num_frames=num_frames, | |
| guidance_scale=guidance_scale, | |
| generator=generator, | |
| )[0] | |
| # Export to temporary file | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") | |
| output_path = temp_file.name | |
| temp_file.close() | |
| export_to_video(video[0], output_path, fps=fps) | |
| status = f"✓ Video generated successfully! ({num_frames} frames at {fps} fps)" | |
| print(status) | |
| return output_path, status | |
| except Exception as e: | |
| error_msg = f"❌ Error: {str(e)}" | |
| print(error_msg) | |
| import traceback | |
| traceback.print_exc() | |
| return None, error_msg | |
| # Create Gradio interface | |
| with gr.Blocks(title="CogVideoX Keyframe Interpolation") as demo: | |
| gr.Markdown( | |
| """ | |
| # 🎬 CogVideoX Keyframe Interpolation | |
| Generate smooth video transitions between two keyframe images using AI. | |
| **Instructions:** | |
| 1. Upload start and end frame images | |
| 2. Describe the motion/transition in the text prompt | |
| 3. Adjust parameters and generate! | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### 🖼️ Input Keyframes") | |
| first_image_input = gr.Image(label="Start Frame", type="pil", height=300) | |
| last_image_input = gr.Image(label="End Frame", type="pil", height=300) | |
| with gr.Column(): | |
| gr.Markdown("### ⚙️ Generation Settings") | |
| prompt_input = gr.Textbox( | |
| label="Motion Description", | |
| placeholder="Describe the motion/transition between the frames...", | |
| lines=4, | |
| ) | |
| with gr.Row(): | |
| num_frames_slider = gr.Slider( | |
| label="Number of Frames", | |
| minimum=13, | |
| maximum=49, | |
| step=4, | |
| value=49, | |
| info="Must be 4k+1 format (13, 17, 21, ..., 49)", | |
| ) | |
| fps_slider = gr.Slider( | |
| label="FPS", minimum=4, maximum=16, step=2, value=8 | |
| ) | |
| with gr.Row(): | |
| num_steps_slider = gr.Slider( | |
| label="Inference Steps", | |
| minimum=20, | |
| maximum=100, | |
| step=5, | |
| value=50, | |
| info="More steps = better quality but slower", | |
| ) | |
| guidance_slider = gr.Slider( | |
| label="Guidance Scale", | |
| minimum=1.0, | |
| maximum=15.0, | |
| step=0.5, | |
| value=6.0, | |
| info="Higher = stronger prompt following", | |
| ) | |
| seed_input = gr.Number(label="Random Seed", value=42, precision=0) | |
| generate_btn = gr.Button("🎬 Generate Video", variant="primary", size="lg") | |
| gr.Markdown("---") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### 🎥 Generated Video") | |
| output_video = gr.Video(label="Output") | |
| generation_status = gr.Textbox(label="Generation Status", interactive=False) | |
| # Examples | |
| gr.Markdown("---") | |
| gr.Markdown("### 💡 Example Prompts") | |
| gr.Examples( | |
| examples=[ | |
| [ | |
| "A person walks forward slowly, their body moving naturally with each step." | |
| ], | |
| ["The camera smoothly pans from left to right, revealing the scene."], | |
| ["A dancer gracefully transitions from one pose to another."], | |
| ["The sun sets gradually, changing the lighting and colors of the scene."], | |
| ["A car accelerates down the street, moving from standstill to motion."], | |
| ], | |
| inputs=prompt_input, | |
| label="Click to use example prompts", | |
| ) | |
| # Event handlers | |
| generate_btn.click( | |
| fn=generate_interpolation, | |
| inputs=[ | |
| first_image_input, | |
| last_image_input, | |
| prompt_input, | |
| num_frames_slider, | |
| num_steps_slider, | |
| guidance_slider, | |
| fps_slider, | |
| seed_input, | |
| ], | |
| outputs=[output_video, generation_status], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |