Spaces:
Sleeping
Sleeping
File size: 3,014 Bytes
1d7f1a4 221df0b 1d7f1a4 bc73034 2f27afc 1d7f1a4 bc73034 eeaecb8 bc73034 2f27afc bc73034 eeaecb8 2f27afc 76374a0 2f27afc 239f04a 76374a0 eeaecb8 2f27afc eeaecb8 239f04a 1d7f1a4 76374a0 1d7f1a4 76374a0 1d7f1a4 76374a0 2f27afc 1d7f1a4 239f04a fc67fc0 eeaecb8 2f27afc eeaecb8 2f27afc eeaecb8 239f04a eeaecb8 2f27afc eeaecb8 2f27afc eeaecb8 2f27afc eeaecb8 2f27afc eeaecb8 2f27afc eeaecb8 2f27afc eeaecb8 2f27afc eeaecb8 1d7f1a4 2f27afc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 | import spaces
import gradio as gr
import torch
import numpy as np
from diffusers import WanImageToVideoPipeline
from diffusers.utils import export_to_video
model_id = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
print(f"Using video Model: {model_id}")
dtype = torch.bfloat16
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load pipeline
pipe = WanImageToVideoPipeline.from_pretrained(
model_id,
torch_dtype=dtype
)
pipe.to(device)
print(f"Model Loaded in {device}")
pipe.vae.enable_tiling()
# ================================
# Image Preparation
# ================================
def prepare_vertical_image(pipe, image, base_width=384, base_height=672):
mod_value = (
pipe.vae_scale_factor_spatial *
pipe.transformer.config.patch_size[1]
)
final_width = (base_width // mod_value) * mod_value
final_height = (base_height // mod_value) * mod_value
resized_image = image.resize((final_width, final_height))
return resized_image, final_width, final_height
# ================================
# Video Generation
# ================================
@spaces.GPU(size="xlarge",duration=180)
def generate_video(input_image, prompt, negative_prompt, progress=gr.Progress(track_tqdm=True)):
if input_image is None:
return None
image, width, height = prepare_vertical_image(pipe, input_image)
print(f"Generating vertical video {width}x{height}")
video_frames = pipe(
image=image,
prompt=prompt,
negative_prompt=negative_prompt,
height=height,
width=width,
num_frames=161, # FIXED
guidance_scale=5.0,
num_inference_steps=15
).frames[0]
output_path = "vertical_output.mp4"
export_to_video(video_frames, output_path, fps=16)
return output_path
# ================================
# Gradio UI
# ================================
with gr.Blocks(title="Wan 2.2 Vertical I2V") as demo:
gr.Markdown("# 🎬 Wan 2.2 Image → Video Generator")
gr.Markdown("Generate **10-second Vertical (9:16) AI Videos**")
with gr.Row():
# LEFT SIDE (INPUTS)
with gr.Column(scale=1):
input_image = gr.Image(
type="pil",
label="Upload Image"
)
prompt = gr.Textbox(
label="Prompt",
placeholder="Describe motion, camera movement..."
)
negative_prompt = gr.Textbox(
label="Negative Prompt",
value="blurry, low quality, distorted, static"
)
generate_btn = gr.Button("Generate Video", variant="primary")
# RIGHT SIDE (OUTPUT)
with gr.Column(scale=1):
output_video = gr.Video(
label="Generated Video"
)
generate_btn.click(
generate_video,
inputs=[input_image, prompt, negative_prompt],
outputs=output_video
)
demo.launch(server_name="0.0.0.0", server_port=7860) |