Spaces:

efecelik
/

video-generator

Sleeping

App Files Files Community

video-generator / app.py

efecelik

Remove gr.Examples to fix runtime error

6b6ca52 14 days ago

raw

history blame contribute delete

4.61 kB

	import spaces
	import torch
	import gradio as gr
	import numpy as np
	import random
	from PIL import Image
	from diffusers import CogVideoXImageToVideoPipeline
	from diffusers.utils import export_to_video
	import tempfile
	import os

	# Model configuration
	MODEL_ID = "THUDM/CogVideoX-5b-I2V"
	MAX_SEED = np.iinfo(np.int32).max

	# Load pipeline globally (on CPU first, moved to GPU when needed)
	print("Loading CogVideoX pipeline...")
	pipe = CogVideoXImageToVideoPipeline.from_pretrained(
	MODEL_ID,
	torch_dtype=torch.bfloat16,
	)
	pipe.enable_model_cpu_offload()
	pipe.vae.enable_slicing()
	pipe.vae.enable_tiling()
	print("Pipeline loaded!")

	def resize_image(image: Image.Image, max_size: int = 720) -> Image.Image:
	"""Resize image to fit within max_size while maintaining aspect ratio."""
	width, height = image.size
	if max(width, height) > max_size:
	if width > height:
	new_width = max_size
	new_height = int(height * max_size / width)
	else:
	new_height = max_size
	new_width = int(width * max_size / height)
	# Make dimensions divisible by 16
	new_width = (new_width // 16) * 16
	new_height = (new_height // 16) * 16
	image = image.resize((new_width, new_height), Image.LANCZOS)
	return image

	@spaces.GPU(duration=300)
	def generate_video(
	image: Image.Image,
	prompt: str,
	negative_prompt: str = "",
	num_frames: int = 49,
	guidance_scale: float = 6.0,
	num_inference_steps: int = 50,
	seed: int = -1,
	):
	"""Generate video from image and prompt."""
	if image is None:
	raise gr.Error("Please upload an image!")

	if not prompt:
	prompt = "Make this image come alive with smooth, cinematic motion"

	# Set seed
	if seed == -1:
	seed = random.randint(0, MAX_SEED)
	generator = torch.Generator(device="cuda").manual_seed(seed)

	# Resize image
	image = resize_image(image)

	# Move to GPU and generate
	pipe.to("cuda")

	with torch.inference_mode():
	video_frames = pipe(
	image=image,
	prompt=prompt,
	negative_prompt=negative_prompt,
	num_frames=num_frames,
	guidance_scale=guidance_scale,
	num_inference_steps=num_inference_steps,
	generator=generator,
	).frames[0]

	# Export to video file
	with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f:
	export_to_video(video_frames, f.name, fps=8)
	return f.name, seed

	# Gradio UI
	with gr.Blocks(title="Video Generator") as demo:
	gr.Markdown("""
	# 🎬 Image to Video Generator

	Upload an image and describe the motion you want. Powered by CogVideoX.

	Tips:
	- Use clear, descriptive prompts about motion (e.g., "the person waves hello", "the flower blooms")
	- Keep images simple with clear subjects for best results
	""")

	with gr.Row():
	with gr.Column():
	image_input = gr.Image(type="pil", label="Upload Image")
	prompt_input = gr.Textbox(
	label="Prompt",
	placeholder="Describe the motion you want...",
	value="Make this image come alive with smooth, cinematic motion"
	)
	negative_prompt = gr.Textbox(
	label="Negative Prompt (optional)",
	placeholder="What to avoid...",
	value="blurry, low quality, distorted"
	)

	with gr.Row():
	num_frames = gr.Slider(
	minimum=17, maximum=81, value=49, step=8,
	label="Number of Frames"
	)
	guidance_scale = gr.Slider(
	minimum=1.0, maximum=15.0, value=6.0, step=0.5,
	label="Guidance Scale"
	)

	with gr.Row():
	num_steps = gr.Slider(
	minimum=20, maximum=100, value=50, step=5,
	label="Inference Steps"
	)
	seed_input = gr.Number(
	value=-1, label="Seed (-1 for random)"
	)

	generate_btn = gr.Button("🎬 Generate Video", variant="primary")

	with gr.Column():
	video_output = gr.Video(label="Generated Video")
	seed_output = gr.Number(label="Seed Used")

	generate_btn.click(
	fn=generate_video,
	inputs=[image_input, prompt_input, negative_prompt, num_frames, guidance_scale, num_steps, seed_input],
	outputs=[video_output, seed_output]
	)

	if __name__ == "__main__":
	demo.launch()