un-Flux

Runtime error

App Files Files Community

badrerootunix commited on Dec 13, 2025

Commit

0171411

1 Parent(s): a472df1

Optimize settings: 6 steps default, better resize logic, 24fps

Browse files

Files changed (1) hide show

app.py +42 -26

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import os
 import spaces
 import torch
-from diffusers import WanImageToVideoPipeline
-from diffusers.utils import export_to_video
 import gradio as gr
 import tempfile
 import numpy as np
@@ -13,18 +14,18 @@ import random
 # MODEL CONFIGURATION
 # =========================================================
-MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"
 HF_TOKEN = os.environ.get("HF_TOKEN")
 MAX_DIM = 832
 MIN_DIM = 480
 SQUARE_DIM = 640
 MULTIPLE_OF = 16
 MAX_SEED = np.iinfo(np.int32).max
-FIXED_FPS = 16
 MIN_FRAMES_MODEL = 8
-MAX_FRAMES_MODEL = 49
 MIN_DURATION = 0.5
-MAX_DURATION = 2.0
 # =========================================================
 # LOAD PIPELINE
@@ -42,7 +43,7 @@ pipe = WanImageToVideoPipeline.from_pretrained(
 # =========================================================
 default_prompt_i2v = "Generate a video with smooth and natural movement. Objects should have visible motion while maintaining fluid transitions."
-default_negative_prompt = "low quality, worst quality, blurry, distorted, deformed, ugly, bad anatomy, static, frozen"
 # =========================================================
 # IMAGE RESIZING LOGIC
@@ -50,31 +51,46 @@ default_negative_prompt = "low quality, worst quality, blurry, distorted, deform
 def resize_image(image: Image.Image) -> Image.Image:
     width, height = image.size
-    # Determine orientation and set target dimensions
-    if width > height:  # Landscape
         target_w = MAX_DIM
-        target_h = MIN_DIM
-    elif height > width:  # Portrait
-        target_w = MIN_DIM
         target_h = MAX_DIM
-    else:  # Square
-        target_w = SQUARE_DIM
-        target_h = SQUARE_DIM
-    # Make divisible by 16
-    target_w = (target_w // MULTIPLE_OF) * MULTIPLE_OF
-    target_h = (target_h // MULTIPLE_OF) * MULTIPLE_OF
-    return image.resize((target_w, target_h), Image.LANCZOS)
 # =========================================================
 # UTILITY FUNCTIONS
 # =========================================================
 def get_num_frames(duration_seconds: float):
-    frames = int(round(duration_seconds * FIXED_FPS))
-    return max(MIN_FRAMES_MODEL, min(MAX_FRAMES_MODEL, frames))
 # =========================================================
 # MAIN GENERATION FUNCTION
@@ -85,8 +101,8 @@ def generate_video(
     input_image,
     prompt,
     negative_prompt=default_negative_prompt,
-    duration_seconds=1.5,
-    steps=4,
     guidance_scale=1.0,
     seed=42,
     randomize_seed=False,
@@ -158,7 +174,7 @@ with gr.Blocks() as demo:
                 minimum=MIN_DURATION,
                 maximum=MAX_DURATION,
                 step=0.5,
-                value=1.0,
                 label="Duration (seconds)"
             )
@@ -173,7 +189,7 @@ with gr.Blocks() as demo:
                     minimum=4,
                     maximum=12,
                     step=1,
-                    value=8,
                     label="Inference Steps"
                 )

 import os
 import spaces
 import torch
+from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
+from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
+from diffusers.utils.export_utils import export_to_video
 import gradio as gr
 import tempfile
 import numpy as np
 # MODEL CONFIGURATION
 # =========================================================
+MODEL_ID = os.getenv("MODEL_ID", "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers")
 HF_TOKEN = os.environ.get("HF_TOKEN")
 MAX_DIM = 832
 MIN_DIM = 480
 SQUARE_DIM = 640
 MULTIPLE_OF = 16
 MAX_SEED = np.iinfo(np.int32).max
+FIXED_FPS = 24
 MIN_FRAMES_MODEL = 8
+MAX_FRAMES_MODEL = 81
 MIN_DURATION = 0.5
+MAX_DURATION = 3.0
 # =========================================================
 # LOAD PIPELINE
 # =========================================================
 default_prompt_i2v = "Generate a video with smooth and natural movement. Objects should have visible motion while maintaining fluid transitions."
+default_negative_prompt = "low quality, worst quality, blurry, distorted, deformed, ugly, bad anatomy, static, frozen, overall gray"
 # =========================================================
 # IMAGE RESIZING LOGIC
 def resize_image(image: Image.Image) -> Image.Image:
     width, height = image.size
+    if width == height:
+        return image.resize((SQUARE_DIM, SQUARE_DIM), Image.LANCZOS)
+    aspect_ratio = width / height
+    MAX_ASPECT_RATIO = MAX_DIM / MIN_DIM
+    MIN_ASPECT_RATIO = MIN_DIM / MAX_DIM
+    image_to_resize = image
+    if aspect_ratio > MAX_ASPECT_RATIO:
+        crop_width = int(round(height * MAX_ASPECT_RATIO))
+        left = (width - crop_width) // 2
+        image_to_resize = image.crop((left, 0, left + crop_width, height))
+    elif aspect_ratio < MIN_ASPECT_RATIO:
+        crop_height = int(round(width / MIN_ASPECT_RATIO))
+        top = (height - crop_height) // 2
+        image_to_resize = image.crop((0, top, width, top + crop_height))
+    current_width, current_height = image_to_resize.size
+    current_aspect = current_width / current_height
+    if current_width > current_height:
         target_w = MAX_DIM
+        target_h = int(round(target_w / current_aspect))
+    else:
         target_h = MAX_DIM
+        target_w = int(round(target_h * current_aspect))
+    final_w = round(target_w / MULTIPLE_OF) * MULTIPLE_OF
+    final_h = round(target_h / MULTIPLE_OF) * MULTIPLE_OF
+    final_w = max(MIN_DIM, min(MAX_DIM, final_w))
+    final_h = max(MIN_DIM, min(MAX_DIM, final_h))
+    return image_to_resize.resize((final_w, final_h), Image.LANCZOS)
 # =========================================================
 # UTILITY FUNCTIONS
 # =========================================================
 def get_num_frames(duration_seconds: float):
+    return 1 + int(np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL))
 # =========================================================
 # MAIN GENERATION FUNCTION
     input_image,
     prompt,
     negative_prompt=default_negative_prompt,
+    duration_seconds=2.0,
+    steps=6,
     guidance_scale=1.0,
     seed=42,
     randomize_seed=False,
                 minimum=MIN_DURATION,
                 maximum=MAX_DURATION,
                 step=0.5,
+                value=2.0,
                 label="Duration (seconds)"
             )
                     minimum=4,
                     maximum=12,
                     step=1,
+                    value=6,
                     label="Inference Steps"
                 )