| | import os |
| | import spaces |
| | import torch |
| | from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline |
| | from diffusers.models.transformers.transformer_wan import WanTransformer3DModel |
| | from diffusers.utils.export_utils import export_to_video |
| | import gradio as gr |
| | import tempfile |
| | import numpy as np |
| | from PIL import Image |
| | import random |
| |
|
| | |
| | |
| | |
| |
|
| | MODEL_ID = os.getenv("MODEL_ID", "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers") |
| | HF_TOKEN = os.environ.get("HF_TOKEN") |
| | MAX_DIM = 832 |
| | MIN_DIM = 480 |
| | SQUARE_DIM = 640 |
| | MULTIPLE_OF = 16 |
| | MAX_SEED = np.iinfo(np.int32).max |
| | FIXED_FPS = 24 |
| | MIN_FRAMES_MODEL = 8 |
| | MAX_FRAMES_MODEL = 81 |
| | MIN_DURATION = 0.5 |
| | MAX_DURATION = 3.0 |
| |
|
| | |
| | |
| | |
| |
|
| | print("Loading pipeline...") |
| | pipe = WanImageToVideoPipeline.from_pretrained( |
| | MODEL_ID, |
| | torch_dtype=torch.bfloat16, |
| | token=HF_TOKEN |
| | ) |
| |
|
| | |
| | |
| | |
| |
|
| | default_prompt_i2v = "Generate a video with smooth and natural movement. Objects should have visible motion while maintaining fluid transitions." |
| | default_negative_prompt = "low quality, worst quality, blurry, distorted, deformed, ugly, bad anatomy, static, frozen, overall gray" |
| |
|
| | |
| | |
| | |
| |
|
| | def resize_image(image: Image.Image) -> Image.Image: |
| | width, height = image.size |
| | if width == height: |
| | return image.resize((SQUARE_DIM, SQUARE_DIM), Image.LANCZOS) |
| |
|
| | aspect_ratio = width / height |
| | MAX_ASPECT_RATIO = MAX_DIM / MIN_DIM |
| | MIN_ASPECT_RATIO = MIN_DIM / MAX_DIM |
| |
|
| | image_to_resize = image |
| | if aspect_ratio > MAX_ASPECT_RATIO: |
| | crop_width = int(round(height * MAX_ASPECT_RATIO)) |
| | left = (width - crop_width) // 2 |
| | image_to_resize = image.crop((left, 0, left + crop_width, height)) |
| | elif aspect_ratio < MIN_ASPECT_RATIO: |
| | crop_height = int(round(width / MIN_ASPECT_RATIO)) |
| | top = (height - crop_height) // 2 |
| | image_to_resize = image.crop((0, top, width, top + crop_height)) |
| |
|
| | current_width, current_height = image_to_resize.size |
| | current_aspect = current_width / current_height |
| |
|
| | if current_width > current_height: |
| | target_w = MAX_DIM |
| | target_h = int(round(target_w / current_aspect)) |
| | else: |
| | target_h = MAX_DIM |
| | target_w = int(round(target_h * current_aspect)) |
| |
|
| | final_w = round(target_w / MULTIPLE_OF) * MULTIPLE_OF |
| | final_h = round(target_h / MULTIPLE_OF) * MULTIPLE_OF |
| | final_w = max(MIN_DIM, min(MAX_DIM, final_w)) |
| | final_h = max(MIN_DIM, min(MAX_DIM, final_h)) |
| |
|
| | return image_to_resize.resize((final_w, final_h), Image.LANCZOS) |
| |
|
| | |
| | |
| | |
| |
|
| | def get_num_frames(duration_seconds: float): |
| | return 1 + int(np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)) |
| |
|
| | |
| | |
| | |
| |
|
| | @spaces.GPU(duration=300) |
| | def generate_video( |
| | input_image, |
| | prompt, |
| | negative_prompt=default_negative_prompt, |
| | duration_seconds=2.0, |
| | steps=6, |
| | guidance_scale=1.0, |
| | seed=42, |
| | randomize_seed=False, |
| | progress=gr.Progress(track_tqdm=True), |
| | ): |
| | if input_image is None: |
| | raise gr.Error("Please upload an image.") |
| |
|
| | pipe.to("cuda") |
| |
|
| | num_frames = get_num_frames(duration_seconds) |
| | current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed) |
| | resized_image = resize_image(input_image) |
| |
|
| | output_frames_list = pipe( |
| | image=resized_image, |
| | prompt=prompt, |
| | negative_prompt=negative_prompt, |
| | height=resized_image.height, |
| | width=resized_image.width, |
| | num_frames=num_frames, |
| | guidance_scale=float(guidance_scale), |
| | num_inference_steps=int(steps), |
| | generator=torch.Generator(device="cuda").manual_seed(current_seed), |
| | ).frames[0] |
| |
|
| | with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile: |
| | video_path = tmpfile.name |
| |
|
| | export_to_video(output_frames_list, video_path, fps=FIXED_FPS) |
| | return video_path, current_seed |
| |
|
| | |
| | |
| | |
| |
|
| | with gr.Blocks() as demo: |
| | gr.HTML(""" |
| | <style> |
| | .gradio-container { |
| | background: linear-gradient(135deg, #fef9f3 0%, #f0e6fa 50%, #e6f0fa 100%) !important; |
| | } |
| | footer {display: none !important;} |
| | </style> |
| | <div style="text-align: center; margin-bottom: 20px;"> |
| | <h1 style="color: #6b5b7a; font-size: 2.2rem; font-weight: 700; margin-bottom: 0.3rem;"> |
| | NSFW Uncensored "Image to Video" |
| | </h1> |
| | <p style="color: #8b7b9b; font-size: 1rem;">Powered by Wan 2.1 Model</p> |
| | </div> |
| | """) |
| |
|
| | with gr.Row(): |
| | with gr.Column(scale=1): |
| | input_image_component = gr.Image( |
| | type="pil", |
| | label="Upload Image", |
| | height=350 |
| | ) |
| |
|
| | prompt_input = gr.Textbox( |
| | label="Prompt", |
| | value=default_prompt_i2v, |
| | placeholder="Describe the motion you want...", |
| | lines=3 |
| | ) |
| |
|
| | duration_seconds_input = gr.Slider( |
| | minimum=MIN_DURATION, |
| | maximum=MAX_DURATION, |
| | step=0.5, |
| | value=2.0, |
| | label="Duration (seconds)" |
| | ) |
| |
|
| | with gr.Accordion("Advanced Options", open=False): |
| | negative_prompt_input = gr.Textbox( |
| | label="Negative Prompt", |
| | value=default_negative_prompt, |
| | lines=2 |
| | ) |
| |
|
| | steps_slider = gr.Slider( |
| | minimum=4, |
| | maximum=12, |
| | step=1, |
| | value=6, |
| | label="Inference Steps" |
| | ) |
| |
|
| | guidance_scale_input = gr.Slider( |
| | minimum=0.0, |
| | maximum=5.0, |
| | step=0.5, |
| | value=1.0, |
| | label="Guidance Scale" |
| | ) |
| |
|
| | seed_input = gr.Slider( |
| | label="Seed", |
| | minimum=0, |
| | maximum=MAX_SEED, |
| | step=1, |
| | value=42 |
| | ) |
| |
|
| | randomize_seed_checkbox = gr.Checkbox( |
| | label="Randomize Seed", |
| | value=True |
| | ) |
| |
|
| | generate_button = gr.Button( |
| | "Generate Video", |
| | variant="primary" |
| | ) |
| |
|
| | with gr.Column(scale=1): |
| | video_output = gr.Video( |
| | label="Generated Video", |
| | autoplay=True, |
| | height=450 |
| | ) |
| |
|
| | ui_inputs = [ |
| | input_image_component, |
| | prompt_input, |
| | negative_prompt_input, |
| | duration_seconds_input, |
| | steps_slider, |
| | guidance_scale_input, |
| | seed_input, |
| | randomize_seed_checkbox |
| | ] |
| |
|
| | generate_button.click( |
| | fn=generate_video, |
| | inputs=ui_inputs, |
| | outputs=[video_output, seed_input] |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | demo.queue().launch() |
| |
|