File size: 3,014 Bytes
1d7f1a4
221df0b
1d7f1a4
 
bc73034
2f27afc
1d7f1a4
bc73034
eeaecb8
bc73034
2f27afc
bc73034
 
eeaecb8
2f27afc
 
 
 
 
76374a0
2f27afc
 
239f04a
76374a0
eeaecb8
2f27afc
eeaecb8
 
239f04a
1d7f1a4
76374a0
 
 
 
1d7f1a4
76374a0
 
1d7f1a4
76374a0
 
 
 
 
2f27afc
 
 
1d7f1a4
239f04a
fc67fc0
eeaecb8
 
 
 
2f27afc
eeaecb8
2f27afc
eeaecb8
 
 
 
 
 
 
239f04a
 
 
eeaecb8
 
 
2f27afc
eeaecb8
 
 
 
 
2f27afc
eeaecb8
 
 
2f27afc
eeaecb8
2f27afc
 
eeaecb8
 
 
2f27afc
 
eeaecb8
2f27afc
 
 
 
eeaecb8
2f27afc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eeaecb8
 
 
 
 
 
1d7f1a4
2f27afc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import spaces
import gradio as gr
import torch
import numpy as np
from diffusers import WanImageToVideoPipeline
from diffusers.utils import export_to_video

model_id = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"

print(f"Using video Model: {model_id}")

dtype = torch.bfloat16
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load pipeline
pipe = WanImageToVideoPipeline.from_pretrained(
    model_id,
    torch_dtype=dtype
)

pipe.to(device)
print(f"Model Loaded in {device}")
pipe.vae.enable_tiling()

# ================================
# Image Preparation
# ================================

def prepare_vertical_image(pipe, image, base_width=384, base_height=672):

    mod_value = (
        pipe.vae_scale_factor_spatial *
        pipe.transformer.config.patch_size[1]
    )

    final_width = (base_width // mod_value) * mod_value
    final_height = (base_height // mod_value) * mod_value

    resized_image = image.resize((final_width, final_height))

    return resized_image, final_width, final_height


# ================================
# Video Generation
# ================================

@spaces.GPU(size="xlarge",duration=180)
def generate_video(input_image, prompt, negative_prompt, progress=gr.Progress(track_tqdm=True)):

    if input_image is None:
        return None

    image, width, height = prepare_vertical_image(pipe, input_image)

    print(f"Generating vertical video {width}x{height}")

    video_frames = pipe(
        image=image,
        prompt=prompt,
        negative_prompt=negative_prompt,
        height=height,
        width=width,
        num_frames=161,   # FIXED
        guidance_scale=5.0,
        num_inference_steps=15
    ).frames[0]

    output_path = "vertical_output.mp4"

    export_to_video(video_frames, output_path, fps=16)

    return output_path


# ================================
# Gradio UI
# ================================

with gr.Blocks(title="Wan 2.2 Vertical I2V") as demo:

    gr.Markdown("# 🎬 Wan 2.2 Image → Video Generator")
    gr.Markdown("Generate **10-second Vertical (9:16) AI Videos**")

    with gr.Row():

        # LEFT SIDE (INPUTS)
        with gr.Column(scale=1):

            input_image = gr.Image(
                type="pil",
                label="Upload Image"
            )

            prompt = gr.Textbox(
                label="Prompt",
                placeholder="Describe motion, camera movement..."
            )

            negative_prompt = gr.Textbox(
                label="Negative Prompt",
                value="blurry, low quality, distorted, static"
            )

            generate_btn = gr.Button("Generate Video", variant="primary")

        # RIGHT SIDE (OUTPUT)
        with gr.Column(scale=1):

            output_video = gr.Video(
                label="Generated Video"
            )

    generate_btn.click(
        generate_video,
        inputs=[input_image, prompt, negative_prompt],
        outputs=output_video
    )

demo.launch(server_name="0.0.0.0", server_port=7860)