wannsfw

Paused

App Files Files Community

samsammysammy commited on Feb 3

Commit

dcd7d2e

verified ·

1 Parent(s): e050b88

Update app.py

Browse files

Files changed (1) hide show

app.py +146 -208

app.py CHANGED Viewed

@@ -1,47 +1,91 @@
 # IMPORTANT: spaces must be imported first to avoid CUDA initialization issues
 import spaces
-# Standard library imports
 import os
-# Third-party imports (non-CUDA)
 import numpy as np
 from PIL import Image
 import gradio as gr
-# CUDA-related imports (must come after spaces)
 import torch
 from diffusers import WanPipeline, AutoencoderKLWan
 from diffusers.utils import export_to_video
-# Model configuration
 MODEL_ID = "Wan-AI/Wan2.2-TI2V-5B-Diffusers"
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# Global pipeline variable
 pipe = None
 def initialize_pipeline():
-    """Initialize the Wan2.2 pipeline"""
-    global pipe
-    if pipe is None:
-        print("Loading Wan2.2-TI2V-5B model...")
-        vae = AutoencoderKLWan.from_pretrained(
-            MODEL_ID,
-            subfolder="vae",
-            torch_dtype=torch.float32
-        )
-        pipe = WanPipeline.from_pretrained(
-            MODEL_ID,
-            vae=vae,
-            torch_dtype=dtype
-        )
-        pipe.to(device)
-        print("Model loaded successfully!")
     return pipe
-@spaces.GPU(duration=180)  # Allocate GPU for 3 minutes (max allowed for Pro)
 def generate_video(
     prompt: str,
     image: Image.Image = None,
@@ -50,31 +94,41 @@ def generate_video(
     num_frames: int = 73,
     num_inference_steps: int = 35,
     guidance_scale: float = 5.0,
-    seed: int = -1
 ):
-    """
-    Generate video from text prompt and optional image
-    Args:
-        prompt: Text description of the video to generate
-        image: Optional input image for image-to-video generation
-        width: Video width (default: 1280)
-        height: Video height (default: 704)
-        num_frames: Number of frames to generate (default: 73 for 3 seconds at 24fps)
-        num_inference_steps: Number of denoising steps (default: 35 for faster generation)
-        guidance_scale: Guidance scale for generation (default: 5.0)
-        seed: Random seed for reproducibility (-1 for random)
-    """
     try:
-        # Initialize pipeline
         pipeline = initialize_pipeline()
-        # Set seed for reproducibility
         if seed == -1:
             seed = torch.randint(0, 2**32 - 1, (1,)).item()
         generator = torch.Generator(device=device).manual_seed(seed)
-        # Prepare generation parameters
         gen_params = {
             "prompt": prompt,
             "height": height,
@@ -85,212 +139,96 @@ def generate_video(
             "generator": generator,
         }
-        # Add image if provided (for image-to-video)
         if image is not None:
             gen_params["image"] = image
-        # Generate video
-        print(f"Generating video with prompt: {prompt}")
-        print(f"Parameters: {width}x{height}, {num_frames} frames, seed: {seed}")
         output = pipeline(**gen_params).frames[0]
-        # Export to video file
         output_path = "output.mp4"
         export_to_video(output, output_path, fps=24)
-        return output_path, f"Video generated successfully! Seed used: {seed}"
-    except Exception as e:
-        error_msg = f"Error generating video: {str(e)}"
-        print(error_msg)
-        return None, error_msg
-# Create Gradio interface
-with gr.Blocks(title="Wan2.2 Video Generation") as demo:
-    gr.Markdown(
-        """
-        # Wan2.2 Video Generation
-        Generate high-quality videos from text prompts or images using Wan2.2-TI2V-5B model.
-        This model supports both **Text-to-Video** and **Image-to-Video** generation at 720P/24fps.
-        **Note:** Generation takes 2-3 minutes. Settings are optimized for Zero GPU 3-minute limit.
-        """
-    )
     with gr.Row():
         with gr.Column():
-            # Input controls
             prompt_input = gr.Textbox(
-                label="Prompt",
-                placeholder="Describe the video you want to generate...",
-                lines=3,
                 value="Two anthropomorphic cats in comfy boxing gear fight on stage"
             )
-            image_input = gr.Image(
-                label="Input Image (Optional - for Image-to-Video)",
-                type="pil",
-                sources=["upload"]
-            )
             with gr.Accordion("Advanced Settings", open=False):
                 with gr.Row():
-                    width_input = gr.Slider(
-                        label="Width",
-                        minimum=512,
-                        maximum=1920,
-                        step=64,
-                        value=1280
-                    )
-                    height_input = gr.Slider(
-                        label="Height",
-                        minimum=512,
-                        maximum=1080,
-                        step=64,
-                        value=704
-                    )
-                num_frames_input = gr.Slider(
-                    label="Number of Frames (more frames = longer video)",
-                    minimum=25,
-                    maximum=145,
-                    step=24,
-                    value=73,
-                    info="73 frames ≈ 3 seconds at 24fps (optimized for Zero GPU limits)"
-                )
-                num_steps_input = gr.Slider(
-                    label="Inference Steps (more steps = better quality, slower)",
-                    minimum=20,
-                    maximum=60,
-                    step=5,
-                    value=35
-                )
-                guidance_scale_input = gr.Slider(
-                    label="Guidance Scale (higher = closer to prompt)",
-                    minimum=1.0,
-                    maximum=15.0,
-                    step=0.5,
-                    value=5.0
-                )
-                seed_input = gr.Number(
-                    label="Seed (-1 for random)",
-                    value=-1,
-                    precision=0
                 )
             generate_btn = gr.Button("Generate Video", variant="primary", size="lg")
         with gr.Column():
-            # Output
-            video_output = gr.Video(
-                label="Generated Video",
-                autoplay=True
-            )
-            status_output = gr.Textbox(
-                label="Status",
-                lines=2
-            )
-    # Examples
     gr.Examples(
         examples=[
-            [
-                "Two anthropomorphic cats in comfy boxing gear fight on stage",
-                None,
-                1280,
-                704,
-                73,
-                35,
-                5.0,
-                42
-            ],
-            [
-                "A serene underwater scene with colorful coral reefs and tropical fish swimming gracefully",
-                None,
-                1280,
-                704,
-                73,
-                35,
-                5.0,
-                123
-            ],
-            [
-                "A bustling futuristic city at night with neon lights and flying cars",
-                None,
-                1280,
-                704,
-                73,
-                35,
-                5.0,
-                456
-            ],
-            [
-                "A peaceful mountain landscape with snow-capped peaks and a flowing river",
-                None,
-                1280,
-                704,
-                73,
-                35,
-                5.0,
-                789
-            ],
-        ],
-        inputs=[
-            prompt_input,
-            image_input,
-            width_input,
-            height_input,
-            num_frames_input,
-            num_steps_input,
-            guidance_scale_input,
-            seed_input
         ],
         outputs=[video_output, status_output],
         fn=generate_video,
         cache_examples=False,
     )
-    # Connect generate button
     generate_btn.click(
-        fn=generate_video,
-        inputs=[
-            prompt_input,
-            image_input,
-            width_input,
-            height_input,
-            num_frames_input,
-            num_steps_input,
-            guidance_scale_input,
-            seed_input
-        ],
         outputs=[video_output, status_output]
     )
-    gr.Markdown(
-        """
-        ## Tips for Best Results:
-        - Use detailed, descriptive prompts
-        - For image-to-video: Upload a clear image that matches your prompt
-        - Higher inference steps = better quality but slower generation
-        - Adjust guidance scale to balance creativity vs. prompt adherence
-        - Use the same seed to reproduce results
-        - Keep generation under 3 minutes to fit Zero GPU limits
-        ## Model Information:
-        - Model: Wan2.2-TI2V-5B (5B parameters)
-        - Resolution: 720P (1280x704 or custom)
-        - Frame Rate: 24 fps
-        - Default Duration: 3 seconds (optimized for Zero GPU)
-        - Generation Time: ~2-3 minutes on Zero GPU (with optimized settings)
-        """
-    )
-# Launch the app
 if __name__ == "__main__":
-    demo.queue(max_size=20)
-    demo.launch()

 # IMPORTANT: spaces must be imported first to avoid CUDA initialization issues
 import spaces
 import os
 import numpy as np
 from PIL import Image
 import gradio as gr
 import torch
 from diffusers import WanPipeline, AutoencoderKLWan
 from diffusers.utils import export_to_video
+# ────────────────────────────────────────────────
+#  Model + LoRA configuration
+# ────────────────────────────────────────────────
 MODEL_ID = "Wan-AI/Wan2.2-TI2V-5B-Diffusers"
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
+AVAILABLE_LORAS = [
+    {
+        "name": "Lightning (Fast 4-step)",
+        "repo_id": "lightx2v/Wan2.2-Distill-Loras",
+        "filename": "wan2.2_i2v_A14b_high_noise_lora_rank64_lightx2v_4step_1022.safetensors",
+        "default_strength": 1.0,
+    },
+    {
+        "name": "General NSFW",
+        "repo_id": "lopi999/Wan2.2-I2V_General-NSFW-LoRA",
+        "filename": "pytorch_lora_weights.safetensors",
+        "default_strength": 0.8,
+    },
+    # Add more LoRAs here — they will be pre-loaded automatically
+]
+# Global pipeline + pre-loaded adapter info
 pipe = None
+lora_adapters = {}          # name → {"adapter_name": str, "strength": float}
 def initialize_pipeline():
+    global pipe, lora_adapters
+    if pipe is not None:
+        return pipe
+    print("Loading Wan2.2-TI2V-5B base model...")
+    vae = AutoencoderKLWan.from_pretrained(
+        MODEL_ID,
+        subfolder="vae",
+        torch_dtype=torch.float32
+    )
+    pipe = WanPipeline.from_pretrained(
+        MODEL_ID,
+        vae=vae,
+        torch_dtype=dtype
+    )
+    pipe.to(device)
+    print("Base model loaded.")
+    # ── Pre-load ALL available LoRAs once ───────────────
+    print("Pre-loading LoRAs...")
+    for lora in AVAILABLE_LORAS:
+        name = lora["name"]
+        try:
+            print(f"  → {name}")
+            pipe.load_lora_weights(
+                lora["repo_id"],
+                weight_name=lora["filename"],
+                adapter_name=name,           # unique identifier
+            )
+            # Store for later hot-swapping
+            lora_adapters[name] = {
+                "adapter_name": name,
+                "strength": lora["default_strength"]
+            }
+        except Exception as e:
+            print(f"    Failed to load {name}: {e}")
+    # Fuse once → best inference performance
+    if lora_adapters:
+        pipe.fuse_lora()
+        print("All LoRAs fused.")
+    print("Pipeline fully initialized.")
     return pipe
+@spaces.GPU(duration=180)
 def generate_video(
     prompt: str,
     image: Image.Image = None,
     num_frames: int = 73,
     num_inference_steps: int = 35,
     guidance_scale: float = 5.0,
+    seed: int = -1,
+    enabled_loras: list = None,
+    lora_strength_multiplier: float = 1.0
 ):
     try:
         pipeline = initialize_pipeline()
+        # ── Hot-swap / enable only selected LoRAs ───────
+        active_adapters = []
+        active_strengths = []
+        enabled = enabled_loras or []
+        for lora_name in enabled:
+            if lora_name in lora_adapters:
+                strength = lora_adapters[lora_name]["strength"] * lora_strength_multiplier
+                active_adapters.append(lora_name)
+                active_strengths.append(strength)
+        if active_adapters:
+            pipeline.set_adapters(active_adapters, adapter_strengths=active_strengths)
+            print(f"Activated LoRAs: {', '.join(active_adapters)}")
+        else:
+            pipeline.disable_lora()   # important: turn off if none selected
+        # Lightning auto-optimization
+        if "Lightning (Fast 4-step)" in enabled and num_inference_steps > 8:
+            num_inference_steps = 4
+            print("Lightning LoRA → reduced to 4 steps")
+        # Seed
         if seed == -1:
             seed = torch.randint(0, 2**32 - 1, (1,)).item()
         generator = torch.Generator(device=device).manual_seed(seed)
         gen_params = {
             "prompt": prompt,
             "height": height,
             "generator": generator,
         }
         if image is not None:
             gen_params["image"] = image
+        print(f"Generating: {width}x{height}, {num_frames} frames, steps={num_inference_steps}")
         output = pipeline(**gen_params).frames[0]
         output_path = "output.mp4"
         export_to_video(output, output_path, fps=24)
+        status = f"Done! Seed: {seed}"
+        if active_adapters:
+            status += f"\nLoRAs: {', '.join(active_adapters)} @ {lora_strength_multiplier:.2f}x"
+        return output_path, status
+    except Exception as e:
+        msg = f"Error: {str(e)}"
+        print(msg)
+        return None, msg
+# ────────────────────────────────────────────────
+#               Gradio UI
+# ────────────────────────────────────────────────
+with gr.Blocks(title="Wan2.2 Video + Fast LoRA") as demo:
+    gr.Markdown("""
+    # Wan2.2-TI2V-5B Video Generation
+    **Optimized LoRA loading** — all LoRAs pre-loaded at startup, then hot-swapped instantly.
+    """)
     with gr.Row():
         with gr.Column():
             prompt_input = gr.Textbox(
+                label="Prompt", lines=3,
                 value="Two anthropomorphic cats in comfy boxing gear fight on stage"
             )
+            image_input = gr.Image(label="Input Image (optional)", type="pil", sources=["upload"])
             with gr.Accordion("Advanced Settings", open=False):
                 with gr.Row():
+                    width_input  = gr.Slider(512, 1920, step=64, value=1280, label="Width")
+                    height_input = gr.Slider(512, 1080, step=64, value=704, label="Height")
+                num_frames_input = gr.Slider(25, 145, step=24, value=73, label="Frames")
+                num_steps_input  = gr.Slider(4, 60, step=1, value=35, label="Inference Steps",
+                                             info="Lightning LoRA → try 4–8 steps")
+                guidance_scale_input = gr.Slider(1.0, 15.0, 0.5, value=5.0, label="Guidance Scale")
+                seed_input = gr.Number(label="Seed (-1 = random)", value=-1, precision=0)
+            with gr.Accordion("LoRA Controls", open=True):
+                lora_checkbox = gr.CheckboxGroup(
+                    choices=[l["name"] for l in AVAILABLE_LORAS],
+                    label="Enable LoRAs",
+                    value=[]
                 )
+                lora_strength = gr.Slider(0.1, 1.5, 0.05, value=1.0,
+                                          label="Global Strength Multiplier")
             generate_btn = gr.Button("Generate Video", variant="primary", size="lg")
         with gr.Column():
+            video_output = gr.Video(label="Generated Video", autoplay=True)
+            status_output = gr.Textbox(label="Status", lines=3)
+    # Examples with LoRA usage
     gr.Examples(
         examples=[
+            ["Two anthropomorphic cats in comfy boxing gear fight on stage", None, 1280, 704, 73, 35, 5.0, 42, [], 1.0],
+            ["A serene underwater scene with colorful coral reefs...", None, 1280, 704, 73, 4, 5.0, 123, ["Lightning (Fast 4-step)"], 1.0],
+            ["Explicit adult scene, detailed", None, 1280, 704, 73, 30, 6.0, 999, ["General NSFW"], 0.9],
         ],
+        inputs=[prompt_input, image_input, width_input, height_input, num_frames_input,
+                num_steps_input, guidance_scale_input, seed_input, lora_checkbox, lora_strength],
         outputs=[video_output, status_output],
         fn=generate_video,
         cache_examples=False,
     )
     generate_btn.click(
+        generate_video,
+        inputs=[prompt_input, image_input, width_input, height_input, num_frames_input,
+                num_steps_input, guidance_scale_input, seed_input, lora_checkbox, lora_strength],
         outputs=[video_output, status_output]
     )
+    gr.Markdown("""
+    ## Performance Notes
+    - LoRAs are **pre-loaded once** → first generation may take ~10–30s longer, later ones are fast.
+    - Lightning LoRA: use **4–8 steps** → generation can finish in <60s.
+    - Add new LoRAs by appending to `AVAILABLE_LORAS` — they auto-load at startup.
+    """)
 if __name__ == "__main__":
+    demo.queue(max_size=20).launch()