Long-video

Runtime error

App Files Files Community

tester343 commited on Dec 15, 2025

Commit

2292603

verified ·

1 Parent(s): b51f7ed

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -64

app.py CHANGED Viewed

@@ -8,30 +8,31 @@ import numpy as np
 import gradio as gr
 from PIL import Image
 # =========================================================
-# 1. CONFIGURATION - USE SMALLER 1.3B MODEL
 # =========================================================
-# The 14B model is too large for ZeroGPU free tier
-MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"  # Or use 1.3B if available
-LORA_REPO = "Kijai/WanVideo_comfy"
-LORA_NAME = "Lightx2v/lightx2v_I2V_480p_bf16.safetensors"
 HF_TOKEN = os.environ.get("HF_TOKEN")
 MAX_DIM = 480
 MIN_DIM = 480
 MULTIPLE_OF = 16
 MAX_SEED = np.iinfo(np.int32).max
 FIXED_FPS = 16
-# Global pipeline holder
-_pipe = None
 # =========================================================
 # 2. HELPER FUNCTIONS
 # =========================================================
 def resize_image(image: Image.Image) -> Image.Image:
-    """Resize image to safe dimensions."""
     width, height = image.size
     aspect = width / height
@@ -42,77 +43,96 @@ def resize_image(image: Image.Image) -> Image.Image:
         w = MIN_DIM
         h = int(w / aspect)
     w = (round(w / MULTIPLE_OF) * MULTIPLE_OF)
     h = (round(h / MULTIPLE_OF) * MULTIPLE_OF)
     w = min(max(w, MIN_DIM), MAX_DIM)
     h = min(max(h, MIN_DIM), MAX_DIM)
     return image.resize((w, h), Image.LANCZOS)
 def cleanup():
     gc.collect()
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
 # =========================================================
-# 3. SIMPLE GENERATION - MINIMAL LOADING
 # =========================================================
-@spaces.GPU(duration=180)
 def generate(
     image_path: str,
     prompt: str,
     duration: float = 3.0,
-    steps: int = 4,
-    guidance: float = 1.0,
     seed: int = 42,
     randomize: bool = True,
     progress=gr.Progress(track_tqdm=True)
 ):
-    """Generate video with minimal overhead."""
-    global _pipe
     if not image_path:
         raise gr.Error("Please upload an image.")
-    try:
-        progress(0.1, desc="Initializing...")
-        # Import inside function
-        from diffusers import AutoPipelineForImage2Video
-        from diffusers.utils import export_to_video
-        # Load pipeline only once
-        if _pipe is None:
-            progress(0.2, desc="Loading model (first run)...")
-            print("⏳ Loading pipeline...")
-            _pipe = AutoPipelineForImage2Video.from_pretrained(
                 MODEL_ID,
                 torch_dtype=torch.bfloat16,
                 token=HF_TOKEN,
             )
-            _pipe.to("cuda")
-            print("✅ Pipeline loaded")
-        # Prepare inputs
-        progress(0.4, desc="Processing...")
         img = Image.open(image_path).convert("RGB")
         img = resize_image(img)
         final_seed = random.randint(0, MAX_SEED) if randomize else int(seed)
-        num_frames = max(8, min(int(duration * FIXED_FPS), 49))
-        print(f"📐 {img.size}, frames={num_frames}, seed={final_seed}")
-        # Generate
-        progress(0.5, desc="Generating video...")
-        cleanup()
         with torch.inference_mode():
-            output = _pipe(
                 image=img,
                 prompt=prompt,
-                negative_prompt="low quality, blur, distortion",
                 height=img.height,
                 width=img.width,
                 num_frames=num_frames,
@@ -123,7 +143,7 @@ def generate(
         frames = output.frames[0]
-        # Save
         progress(0.9, desc="Saving...")
         with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f:
             video_path = f.name
@@ -131,18 +151,16 @@ def generate(
         export_to_video(frames, video_path, fps=FIXED_FPS)
         cleanup()
-        print(f"✅ Saved: {video_path}")
         return video_path, final_seed
     except Exception as e:
         cleanup()
-        error_msg = str(e)
-        print(f"❌ {error_msg}")
-        if "out of memory" in error_msg.lower():
-            raise gr.Error("Out of memory. Try shorter duration or smaller image.")
-        raise gr.Error(f"Error: {error_msg[:150]}")
 # =========================================================
 # 4. GRADIO UI
@@ -151,48 +169,49 @@ with gr.Blocks() as demo:
     gr.HTML("""
     <div style="text-align:center; padding:20px; background:linear-gradient(135deg,#1e3c72,#2a5298);
                 color:white; border-radius:12px; margin-bottom:20px;">
-        <h1>🎬 Wan Video Generator</h1>
-        <p>Image to Video • Optimized for ZeroGPU</p>
     </div>
     """)
     with gr.Row():
         with gr.Column():
-            img_in = gr.Image(type="filepath", label="📷 Image")
             prompt = gr.Textbox(
                 label="✍️ Prompt",
-                value="Smooth cinematic motion, high quality, natural movement",
                 lines=2
             )
             with gr.Row():
-                duration = gr.Slider(1, 5, value=3, step=0.5, label="Duration (s)")
-                steps = gr.Slider(2, 8, value=4, step=1, label="Steps")
             with gr.Row():
                 seed = gr.Number(value=42, label="Seed", precision=0)
-                randomize = gr.Checkbox(value=True, label="Random")
-            btn = gr.Button("🚀 Generate", variant="primary")
         with gr.Column():
             video_out = gr.Video(label="🎥 Result")
-            seed_out = gr.Number(label="Seed", precision=0)
             gr.HTML("""
-            <div style="background:#e7f3ff; padding:12px; border-radius:8px; margin-top:10px;">
-                <b>💡 Tips:</b><br>
-                • Keep duration short (2-3s) for best results<br>
-                • First generation takes longer (loading model)<br>
-                • If error, wait a moment and retry
             </div>
             """)
     btn.click(
         fn=generate,
-        inputs=[img_in, prompt, duration, steps, gr.Number(value=1.0, visible=False), seed, randomize],
         outputs=[video_out, seed_out]
     )
 if __name__ == "__main__":
-    demo.queue(max_size=1).launch()

 import gradio as gr
 from PIL import Image
+# Use the specific pipeline class for Wan models
+from diffusers import WanImageToVideoPipeline
+from diffusers.utils import export_to_video
 # =========================================================
+# 1. CONFIGURATION
 # =========================================================
+MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"
 HF_TOKEN = os.environ.get("HF_TOKEN")
+# Strict dimensions for the 14B model to prevent crashes
 MAX_DIM = 480
 MIN_DIM = 480
 MULTIPLE_OF = 16
 MAX_SEED = np.iinfo(np.int32).max
 FIXED_FPS = 16
+# Global variable to hold the model in memory between runs
+global_pipe = None
 # =========================================================
 # 2. HELPER FUNCTIONS
 # =========================================================
 def resize_image(image: Image.Image) -> Image.Image:
+    """Resize image to exactly 480p to keep the 14B model happy."""
     width, height = image.size
     aspect = width / height
         w = MIN_DIM
         h = int(w / aspect)
+    # Enforce multiples of 16
     w = (round(w / MULTIPLE_OF) * MULTIPLE_OF)
     h = (round(h / MULTIPLE_OF) * MULTIPLE_OF)
+    # Hard cap
     w = min(max(w, MIN_DIM), MAX_DIM)
     h = min(max(h, MIN_DIM), MAX_DIM)
     return image.resize((w, h), Image.LANCZOS)
 def cleanup():
+    """Force garbage collection to free VRAM."""
     gc.collect()
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
 # =========================================================
+# 3. GENERATION LOGIC
 # =========================================================
+@spaces.GPU(duration=240) # 4 Minute timeout
 def generate(
     image_path: str,
     prompt: str,
     duration: float = 3.0,
+    steps: int = 15, # Increased slightly for quality
+    guidance: float = 5.0,
     seed: int = 42,
     randomize: bool = True,
     progress=gr.Progress(track_tqdm=True)
 ):
+    global global_pipe
     if not image_path:
         raise gr.Error("Please upload an image.")
+    # 1. LOAD MODEL (Lazy Loading)
+    # We only load it once. If it's already loaded, we skip this.
+    if global_pipe is None:
+        print("⏳ Loading Wan 14B Pipeline... (This happens only once)")
+        progress(0.1, desc="Loading Model (One-time setup)...")
+        try:
+            # Load in bfloat16 to save memory
+            global_pipe = WanImageToVideoPipeline.from_pretrained(
                 MODEL_ID,
                 torch_dtype=torch.bfloat16,
                 token=HF_TOKEN,
             )
+            # CRITICAL OPTIMIZATION FOR ZERO GPU:
+            # 1. CPU Offload: Moves layers to CPU when not in use. Essential for 14B.
+            global_pipe.enable_model_cpu_offload()
+            # 2. VAE Tiling: Prevents VRAM explosion during decoding.
+            global_pipe.enable_vae_tiling()
+            print("✅ Model loaded and optimized.")
+        except Exception as e:
+            print(f"❌ Load Error: {e}")
+            raise gr.Error(f"Failed to load model: {e}")
+    # 2. PROCESS INPUT
+    try:
+        progress(0.3, desc="Processing Image...")
+        cleanup()
         img = Image.open(image_path).convert("RGB")
         img = resize_image(img)
         final_seed = random.randint(0, MAX_SEED) if randomize else int(seed)
+        # Wan generally produces 16fps.
+        # 5 seconds = 81 frames usually.
+        # We ensure we don't ask for too many frames to avoid timeout.
+        num_frames = int(duration * FIXED_FPS)
+        # Ensure divisible by 4 plus 1 for Wan specifics if needed, but standard int is usually fine
+        if (num_frames - 1) % 4 != 0:
+            num_frames += (4 - ((num_frames - 1) % 4))
+        print(f"🎬 Generating: {img.size} | Frames: {num_frames} | Seed: {final_seed}")
+        # 3. RUN INFERENCE
+        progress(0.4, desc="Dreaming...")
         with torch.inference_mode():
+            output = global_pipe(
                 image=img,
                 prompt=prompt,
+                negative_prompt="low quality, blur, distortion, morphing, jitter, artifacts",
                 height=img.height,
                 width=img.width,
                 num_frames=num_frames,
         frames = output.frames[0]
+        # 4. SAVE VIDEO
         progress(0.9, desc="Saving...")
         with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f:
             video_path = f.name
         export_to_video(frames, video_path, fps=FIXED_FPS)
         cleanup()
+        print(f"✅ Video saved: {video_path}")
         return video_path, final_seed
     except Exception as e:
         cleanup()
+        print(f"❌ Error: {e}")
+        # Detect memory errors
+        if "out of memory" in str(e).lower():
+            raise gr.Error("GPU Out of Memory. Try a shorter duration.")
+        raise gr.Error(f"Generation Error: {str(e)[:200]}")
 # =========================================================
 # 4. GRADIO UI
     gr.HTML("""
     <div style="text-align:center; padding:20px; background:linear-gradient(135deg,#1e3c72,#2a5298);
                 color:white; border-radius:12px; margin-bottom:20px;">
+        <h1>🎬 Wan 14B Video Generator</h1>
+        <p>Image to Video • Optimized for ZeroGPU • 14B Parameters</p>
     </div>
     """)
     with gr.Row():
         with gr.Column():
+            img_in = gr.Image(type="filepath", label="📷 Input Image")
             prompt = gr.Textbox(
                 label="✍️ Prompt",
+                value="Cinematic slow motion, high quality, natural movement, 4k",
                 lines=2
             )
             with gr.Row():
+                # Limited duration for safety on free tier
+                duration = gr.Slider(2, 5, value=4, step=1, label="Duration (seconds)")
+                steps = gr.Slider(10, 30, value=15, step=1, label="Quality Steps")
             with gr.Row():
                 seed = gr.Number(value=42, label="Seed", precision=0)
+                randomize = gr.Checkbox(value=True, label="Randomize Seed")
+            btn = gr.Button("🚀 Generate Video", variant="primary")
         with gr.Column():
             video_out = gr.Video(label="🎥 Result")
+            seed_out = gr.Number(label="Used Seed", precision=0)
             gr.HTML("""
+            <div style="background:#f0f0f0; padding:12px; border-radius:8px; margin-top:10px; color:#333;">
+                <b>💡 Notes:</b><br>
+                • <b>First Run:</b> Takes ~60s to load the model.<br>
+                • <b>Subsequent Runs:</b> Much faster.<br>
+                • <b>Limit:</b> Max 5 seconds recommended to avoid crashes.
             </div>
             """)
     btn.click(
         fn=generate,
+        inputs=[img_in, prompt, duration, steps, gr.Number(value=5.0, visible=False), seed, randomize],
         outputs=[video_out, seed_out]
     )
 if __name__ == "__main__":
+    demo.queue().launch()