import spaces import gradio as gr import torch import numpy as np from diffusers import WanImageToVideoPipeline from diffusers.utils import export_to_video model_id = "Wan-AI/Wan2.2-I2V-A14B-Diffusers" print(f"Using video Model: {model_id}") dtype = torch.bfloat16 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Load pipeline pipe = WanImageToVideoPipeline.from_pretrained( model_id, torch_dtype=dtype ) pipe.to(device) print(f"Model Loaded in {device}") pipe.vae.enable_tiling() # ================================ # Image Preparation # ================================ def prepare_vertical_image(pipe, image, base_width=384, base_height=672): mod_value = ( pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[1] ) final_width = (base_width // mod_value) * mod_value final_height = (base_height // mod_value) * mod_value resized_image = image.resize((final_width, final_height)) return resized_image, final_width, final_height # ================================ # Video Generation # ================================ @spaces.GPU(size="xlarge",duration=180) def generate_video(input_image, prompt, negative_prompt, progress=gr.Progress(track_tqdm=True)): if input_image is None: return None image, width, height = prepare_vertical_image(pipe, input_image) print(f"Generating vertical video {width}x{height}") video_frames = pipe( image=image, prompt=prompt, negative_prompt=negative_prompt, height=height, width=width, num_frames=161, # FIXED guidance_scale=5.0, num_inference_steps=15 ).frames[0] output_path = "vertical_output.mp4" export_to_video(video_frames, output_path, fps=16) return output_path # ================================ # Gradio UI # ================================ with gr.Blocks(title="Wan 2.2 Vertical I2V") as demo: gr.Markdown("# 🎬 Wan 2.2 Image → Video Generator") gr.Markdown("Generate **10-second Vertical (9:16) AI Videos**") with gr.Row(): # LEFT SIDE (INPUTS) with gr.Column(scale=1): input_image = gr.Image( type="pil", label="Upload Image" ) prompt = gr.Textbox( label="Prompt", placeholder="Describe motion, camera movement..." ) negative_prompt = gr.Textbox( label="Negative Prompt", value="blurry, low quality, distorted, static" ) generate_btn = gr.Button("Generate Video", variant="primary") # RIGHT SIDE (OUTPUT) with gr.Column(scale=1): output_video = gr.Video( label="Generated Video" ) generate_btn.click( generate_video, inputs=[input_image, prompt, negative_prompt], outputs=output_video ) demo.launch(server_name="0.0.0.0", server_port=7860)