wan2-1-fast

Running on Zero

App Files Files Community

jiuface commited on Oct 19

Commit

eec59d0

verified ·

1 Parent(s): d1b9b5b

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -33

app.py CHANGED Viewed

@@ -1,18 +1,14 @@
 import spaces
 import torch
-from diffusers import AutoencoderKLWan, UniPCMultistepScheduler
 from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
 from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
 from diffusers.utils import export_to_video
-from transformers import CLIPVisionModel
 import gradio as gr
 import tempfile
-from huggingface_hub import hf_hub_download
 import numpy as np
 from PIL import Image
 import random
 from datetime import datetime
-from huggingface_hub import login
 import os
 import time
 from PIL import Image
@@ -46,18 +42,6 @@ MAX_FRAMES_MODEL = 80
 MIN_DURATION = round(MIN_FRAMES_MODEL/FIXED_FPS,1)
 MAX_DURATION = round(MAX_FRAMES_MODEL/FIXED_FPS,1)
-MOD_VALUE = 32
-DEFAULT_H_SLIDER_VALUE = 512
-DEFAULT_W_SLIDER_VALUE = 896
-NEW_FORMULA_MAX_AREA = 480.0 * 832.0
-SLIDER_MIN_H, SLIDER_MAX_H = 128, 896
-SLIDER_MIN_W, SLIDER_MAX_W = 128, 896
-MAX_SEED = np.iinfo(np.int32).max
-#MIN_FRAMES_MODEL = 8
-#MAX_FRAMES_MODEL = 81
 pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID,
     transformer=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
@@ -197,20 +181,32 @@ def upload_video_to_r2(video_file, account_id, access_key, secret_key, bucket_na
     return video_remote_path
-def handle_image_upload_for_dims_wan(uploaded_pil_image, current_h_val, current_w_val):
-    if uploaded_pil_image is None:
-        return gr.update(value=DEFAULT_H_SLIDER_VALUE), gr.update(value=DEFAULT_W_SLIDER_VALUE)
-    try:
-        new_h, new_w = _calculate_new_dimensions_wan(
-            uploaded_pil_image, MOD_VALUE, NEW_FORMULA_MAX_AREA,
-            SLIDER_MIN_H, SLIDER_MAX_H, SLIDER_MIN_W, SLIDER_MAX_W,
-            DEFAULT_H_SLIDER_VALUE, DEFAULT_W_SLIDER_VALUE
-        )
-        return gr.update(value=new_h), gr.update(value=new_w)
-    except Exception as e:
-        gr.Warning("Error attempting to calculate new dimensions")
-        return gr.update(value=DEFAULT_H_SLIDER_VALUE), gr.update(value=DEFAULT_W_SLIDER_VALUE)
 @spaces.GPU(duration=120)
@@ -251,6 +247,7 @@ def generate_video(image_url,
             width=resized_image.width,
             num_frames=num_frames,
             guidance_scale=float(guidance_scale),
             num_inference_steps=int(steps),
             generator=torch.Generator(device="cuda").manual_seed(current_seed)
         ).frames[0]
@@ -279,15 +276,15 @@ with gr.Blocks() as demo:
                 container=False
             )
             prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
-            duration_seconds_input = gr.Slider(minimum=round(MIN_FRAMES_MODEL/FIXED_FPS,1), maximum=round(MAX_FRAMES_MODEL/FIXED_FPS,1), step=0.1, value=2, label="Duration (seconds)", info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.")
             with gr.Accordion("Advanced Settings", open=False):
                 negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
                 seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
                 randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
                 with gr.Row():
-                    height_input = gr.Slider(minimum=SLIDER_MIN_H, maximum=SLIDER_MAX_H, step=MOD_VALUE, value=DEFAULT_H_SLIDER_VALUE, label=f"Output Height (multiple of {MOD_VALUE})")
-                    width_input = gr.Slider(minimum=SLIDER_MIN_W, maximum=SLIDER_MAX_W, step=MOD_VALUE, value=DEFAULT_W_SLIDER_VALUE, label=f"Output Width (multiple of {MOD_VALUE})")
                 steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=4, label="Inference Steps")
                 guidance_scale_input = gr.Slider(minimum=0.0, maximum=20.0, step=0.5, value=1.0, label="Guidance Scale", visible=True)

 import spaces
 import torch
 from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
 from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
 from diffusers.utils import export_to_video
 import gradio as gr
 import tempfile
 import numpy as np
 from PIL import Image
 import random
 from datetime import datetime
 import os
 import time
 from PIL import Image
 MIN_DURATION = round(MIN_FRAMES_MODEL/FIXED_FPS,1)
 MAX_DURATION = round(MAX_FRAMES_MODEL/FIXED_FPS,1)
 pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID,
     transformer=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
     return video_remote_path
+def get_duration(
+    image_url,
+    prompt,
+    height,
+    width,
+    negative_prompt,
+    duration_seconds,
+    guidance_scale,
+    steps,
+    seed,
+    randomize_seed,
+    upload_to_r2,
+    account_id,
+    access_key,
+    secret_key,
+    bucket,
+    progress
+):
+    BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
+    BASE_STEP_DURATION = 15
+    input_image = load_image(image_url)
+    width, height = resize_image(input_image).size
+    frames = get_num_frames(duration_seconds)
+    factor = frames * width * height / BASE_FRAMES_HEIGHT_WIDTH
+    step_duration = BASE_STEP_DURATION * factor ** 1.5
+    return 10 + int(steps) * step_duration
 @spaces.GPU(duration=120)
             width=resized_image.width,
             num_frames=num_frames,
             guidance_scale=float(guidance_scale),
+            guidance_scale_2=float(guidance_scale),
             num_inference_steps=int(steps),
             generator=torch.Generator(device="cuda").manual_seed(current_seed)
         ).frames[0]
                 container=False
             )
             prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
+            duration_seconds_input = gr.Slider(minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=3.5, label="Duration (seconds)", info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.")
             with gr.Accordion("Advanced Settings", open=False):
                 negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
                 seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
                 randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
                 with gr.Row():
+                    height_input = gr.Slider(minimum=512, maximum=1024, step=1, value=640, label=f"Output Height")
+                    width_input = gr.Slider(minimum=512, maximum=1024, step=1, value=540, label=f"Output Width")
                 steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=4, label="Inference Steps")
                 guidance_scale_input = gr.Slider(minimum=0.0, maximum=20.0, step=0.5, value=1.0, label="Guidance Scale", visible=True)