Spaces:

Sushantkas
/

Wan2.2_Model

Sleeping

File size: 3,014 Bytes

1d7f1a4
221df0b
1d7f1a4
 
bc73034
2f27afc
1d7f1a4
bc73034
eeaecb8
bc73034
2f27afc
bc73034
 
eeaecb8
2f27afc
 
 
 
 
76374a0
2f27afc
 
239f04a
76374a0
eeaecb8
2f27afc
eeaecb8
 
239f04a
1d7f1a4
76374a0
 
 
 
1d7f1a4
76374a0
 
1d7f1a4
76374a0
 
 
 
 
2f27afc
 
 
1d7f1a4
239f04a
fc67fc0
eeaecb8
 
 
 
2f27afc
eeaecb8
2f27afc
eeaecb8
 
 
 
 
 
 
239f04a
 
 
eeaecb8
 
 
2f27afc
eeaecb8
 
 
 
 
2f27afc
eeaecb8
 
 
2f27afc
eeaecb8
2f27afc
 
eeaecb8
 
 
2f27afc
 
eeaecb8
2f27afc
 
 
 
eeaecb8
2f27afc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eeaecb8
 
 
 
 
 
1d7f1a4
2f27afc

import spaces
import gradio as gr
import torch
import numpy as np
from diffusers import WanImageToVideoPipeline
from diffusers.utils import export_to_video

model_id = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"

print(f"Using video Model: {model_id}")

dtype = torch.bfloat16
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load pipeline
pipe = WanImageToVideoPipeline.from_pretrained(
    model_id,
    torch_dtype=dtype
)

pipe.to(device)
print(f"Model Loaded in {device}")
pipe.vae.enable_tiling()

# ================================
# Image Preparation
# ================================

def prepare_vertical_image(pipe, image, base_width=384, base_height=672):

    mod_value = (
        pipe.vae_scale_factor_spatial *
        pipe.transformer.config.patch_size[1]
    )

    final_width = (base_width // mod_value) * mod_value
    final_height = (base_height // mod_value) * mod_value

    resized_image = image.resize((final_width, final_height))

    return resized_image, final_width, final_height


# ================================
# Video Generation
# ================================

@spaces.GPU(size="xlarge",duration=180)
def generate_video(input_image, prompt, negative_prompt, progress=gr.Progress(track_tqdm=True)):

    if input_image is None:
        return None

    image, width, height = prepare_vertical_image(pipe, input_image)

    print(f"Generating vertical video {width}x{height}")

    video_frames = pipe(
        image=image,
        prompt=prompt,
        negative_prompt=negative_prompt,
        height=height,
        width=width,
        num_frames=161,   # FIXED
        guidance_scale=5.0,
        num_inference_steps=15
    ).frames[0]

    output_path = "vertical_output.mp4"

    export_to_video(video_frames, output_path, fps=16)

    return output_path


# ================================
# Gradio UI
# ================================

with gr.Blocks(title="Wan 2.2 Vertical I2V") as demo:

    gr.Markdown("# 🎬 Wan 2.2 Image → Video Generator")
    gr.Markdown("Generate **10-second Vertical (9:16) AI Videos**")

    with gr.Row():

        # LEFT SIDE (INPUTS)
        with gr.Column(scale=1):

            input_image = gr.Image(
                type="pil",
                label="Upload Image"
            )

            prompt = gr.Textbox(
                label="Prompt",
                placeholder="Describe motion, camera movement..."
            )

            negative_prompt = gr.Textbox(
                label="Negative Prompt",
                value="blurry, low quality, distorted, static"
            )

            generate_btn = gr.Button("Generate Video", variant="primary")

        # RIGHT SIDE (OUTPUT)
        with gr.Column(scale=1):

            output_video = gr.Video(
                label="Generated Video"
            )

    generate_btn.click(
        generate_video,
        inputs=[input_image, prompt, negative_prompt],
        outputs=output_video
    )

demo.launch(server_name="0.0.0.0", server_port=7860)