Spaces:
Sleeping
Sleeping
| import spaces | |
| import gradio as gr | |
| import torch | |
| import numpy as np | |
| from diffusers import WanImageToVideoPipeline | |
| from diffusers.utils import export_to_video | |
| model_id = "Wan-AI/Wan2.2-I2V-A14B-Diffusers" | |
| print(f"Using video Model: {model_id}") | |
| dtype = torch.bfloat16 | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| # Load pipeline | |
| pipe = WanImageToVideoPipeline.from_pretrained( | |
| model_id, | |
| torch_dtype=dtype | |
| ) | |
| pipe.to(device) | |
| print(f"Model Loaded in {device}") | |
| pipe.vae.enable_tiling() | |
| # ================================ | |
| # Image Preparation | |
| # ================================ | |
| def prepare_vertical_image(pipe, image, base_width=384, base_height=672): | |
| mod_value = ( | |
| pipe.vae_scale_factor_spatial * | |
| pipe.transformer.config.patch_size[1] | |
| ) | |
| final_width = (base_width // mod_value) * mod_value | |
| final_height = (base_height // mod_value) * mod_value | |
| resized_image = image.resize((final_width, final_height)) | |
| return resized_image, final_width, final_height | |
| # ================================ | |
| # Video Generation | |
| # ================================ | |
| def generate_video(input_image, prompt, negative_prompt, progress=gr.Progress(track_tqdm=True)): | |
| if input_image is None: | |
| return None | |
| image, width, height = prepare_vertical_image(pipe, input_image) | |
| print(f"Generating vertical video {width}x{height}") | |
| video_frames = pipe( | |
| image=image, | |
| prompt=prompt, | |
| negative_prompt=negative_prompt, | |
| height=height, | |
| width=width, | |
| num_frames=161, # FIXED | |
| guidance_scale=5.0, | |
| num_inference_steps=15 | |
| ).frames[0] | |
| output_path = "vertical_output.mp4" | |
| export_to_video(video_frames, output_path, fps=16) | |
| return output_path | |
| # ================================ | |
| # Gradio UI | |
| # ================================ | |
| with gr.Blocks(title="Wan 2.2 Vertical I2V") as demo: | |
| gr.Markdown("# 🎬 Wan 2.2 Image → Video Generator") | |
| gr.Markdown("Generate **10-second Vertical (9:16) AI Videos**") | |
| with gr.Row(): | |
| # LEFT SIDE (INPUTS) | |
| with gr.Column(scale=1): | |
| input_image = gr.Image( | |
| type="pil", | |
| label="Upload Image" | |
| ) | |
| prompt = gr.Textbox( | |
| label="Prompt", | |
| placeholder="Describe motion, camera movement..." | |
| ) | |
| negative_prompt = gr.Textbox( | |
| label="Negative Prompt", | |
| value="blurry, low quality, distorted, static" | |
| ) | |
| generate_btn = gr.Button("Generate Video", variant="primary") | |
| # RIGHT SIDE (OUTPUT) | |
| with gr.Column(scale=1): | |
| output_video = gr.Video( | |
| label="Generated Video" | |
| ) | |
| generate_btn.click( | |
| generate_video, | |
| inputs=[input_image, prompt, negative_prompt], | |
| outputs=output_video | |
| ) | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |