import os import tempfile import gradio as gr import numpy as np import spaces import torch from diffusers.utils import export_to_video from PIL import Image from cogvideox_interpolation.pipeline import CogVideoXInterpolationPipeline # Load model globally at startup print("Loading CogVideoX-Interpolation model...") MODEL_PATH = "feizhengcong/CogvideoX-Interpolation" dtype = torch.float16 pipe = CogVideoXInterpolationPipeline.from_pretrained( MODEL_PATH, torch_dtype=dtype ) pipe.vae.enable_tiling() pipe.vae.enable_slicing() print("Model loaded successfully!") @spaces.GPU(duration=300) def generate_interpolation( first_image, last_image, prompt, num_frames=49, num_inference_steps=50, guidance_scale=6.0, fps=8, seed=42, ): """Generate interpolated video between two keyframes""" if first_image is None or last_image is None: return None, "⚠️ Please upload both start and end frame images!" if not prompt.strip(): return None, "⚠️ Please provide a text prompt describing the motion!" try: # Convert numpy arrays to PIL Images if needed if not isinstance(first_image, Image.Image): first_image = Image.fromarray(first_image) if not isinstance(last_image, Image.Image): last_image = Image.fromarray(last_image) print(f"Generating video with prompt: {prompt}") print( f"Parameters: frames={num_frames}, steps={num_inference_steps}, guidance={guidance_scale}" ) # Move pipeline to CUDA within the GPU-decorated function pipe.to("cuda") # Generate video generator = torch.Generator(device="cuda").manual_seed(seed) video = pipe( prompt=prompt, first_image=first_image, last_image=last_image, num_videos_per_prompt=1, num_inference_steps=num_inference_steps, num_frames=num_frames, guidance_scale=guidance_scale, generator=generator, )[0] # Export to temporary file temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") output_path = temp_file.name temp_file.close() export_to_video(video[0], output_path, fps=fps) status = f"✓ Video generated successfully! ({num_frames} frames at {fps} fps)" print(status) return output_path, status except Exception as e: error_msg = f"❌ Error: {str(e)}" print(error_msg) import traceback traceback.print_exc() return None, error_msg # Create Gradio interface with gr.Blocks(title="CogVideoX Keyframe Interpolation") as demo: gr.Markdown( """ # 🎬 CogVideoX Keyframe Interpolation Generate smooth video transitions between two keyframe images using AI. **Instructions:** 1. Upload start and end frame images 2. Describe the motion/transition in the text prompt 3. Adjust parameters and generate! """ ) with gr.Row(): with gr.Column(): gr.Markdown("### 🖼️ Input Keyframes") first_image_input = gr.Image(label="Start Frame", type="pil", height=300) last_image_input = gr.Image(label="End Frame", type="pil", height=300) with gr.Column(): gr.Markdown("### ⚙️ Generation Settings") prompt_input = gr.Textbox( label="Motion Description", placeholder="Describe the motion/transition between the frames...", lines=4, ) with gr.Row(): num_frames_slider = gr.Slider( label="Number of Frames", minimum=13, maximum=49, step=4, value=49, info="Must be 4k+1 format (13, 17, 21, ..., 49)", ) fps_slider = gr.Slider( label="FPS", minimum=4, maximum=16, step=2, value=8 ) with gr.Row(): num_steps_slider = gr.Slider( label="Inference Steps", minimum=20, maximum=100, step=5, value=50, info="More steps = better quality but slower", ) guidance_slider = gr.Slider( label="Guidance Scale", minimum=1.0, maximum=15.0, step=0.5, value=6.0, info="Higher = stronger prompt following", ) seed_input = gr.Number(label="Random Seed", value=42, precision=0) generate_btn = gr.Button("🎬 Generate Video", variant="primary", size="lg") gr.Markdown("---") with gr.Row(): with gr.Column(): gr.Markdown("### 🎥 Generated Video") output_video = gr.Video(label="Output") generation_status = gr.Textbox(label="Generation Status", interactive=False) # Examples gr.Markdown("---") gr.Markdown("### 💡 Example Prompts") gr.Examples( examples=[ [ "A person walks forward slowly, their body moving naturally with each step." ], ["The camera smoothly pans from left to right, revealing the scene."], ["A dancer gracefully transitions from one pose to another."], ["The sun sets gradually, changing the lighting and colors of the scene."], ["A car accelerates down the street, moving from standstill to motion."], ], inputs=prompt_input, label="Click to use example prompts", ) # Event handlers generate_btn.click( fn=generate_interpolation, inputs=[ first_image_input, last_image_input, prompt_input, num_frames_slider, num_steps_slider, guidance_slider, fps_slider, seed_input, ], outputs=[output_video, generation_status], ) if __name__ == "__main__": demo.launch()