Spaces:

mvp-lab
/

70113_ImgGen_Diffusion_ControlNetxLoRA

Sleeping

App Files Files Community

oliveryanzuolu commited on Feb 2

Commit

6580922

verified ·

1 Parent(s): 65663ad

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -89

app.py CHANGED Viewed

@@ -4,13 +4,11 @@ import spaces
 import cv2
 import numpy as np
 from PIL import Image
-import os
-# Diffusers and ControlNet imports
 from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel, AutoencoderKL, UniPCMultistepScheduler
 # -----------------------------------------------------------------------------
-# 1. Configuration & Registry (SDXL Version)
 # -----------------------------------------------------------------------------
 LORA_REGISTRY = {
     "None (Base SDXL)": {
@@ -39,50 +37,35 @@ LORA_REGISTRY = {
 }
 # -----------------------------------------------------------------------------
-# 2. Model Initialization
 # -----------------------------------------------------------------------------
-print("Initializing SDXL Inference Pipeline...")
-# On ZeroGPU, we initialize standard variables, but we rely on the decorator for device placement
-device = "cuda" if torch.cuda.is_available() else "cpu"
-dtype = torch.float16
-# 1. Load VAE (Critical for SDXL fp16 stability to avoid NaNs)
 vae = AutoencoderKL.from_pretrained(
     "madebyollin/sdxl-vae-fp16-fix",
-    torch_dtype=dtype
 )
-# 2. Load ControlNet (Must be SDXL version)
 controlnet = ControlNetModel.from_pretrained(
     "diffusers/controlnet-canny-sdxl-1.0",
-    torch_dtype=dtype,
     use_safetensors=True
 )
-# 3. Load Base SDXL
 pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0",
     controlnet=controlnet,
     vae=vae,
-    torch_dtype=dtype,
     use_safetensors=True
 )
-# Optimization
 pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
-# For ZeroGPU/Spaces, enable_model_cpu_offload is the standard way to handle SDXL
-# This registers hooks that automatically move layers to GPU when the @spaces.GPU function is called
-try:
-    pipe.enable_model_cpu_offload()
-except Exception as e:
-    print(f"Offload warning: {e}")
-print("SDXL Pipeline Loaded Successfully.")
 # -----------------------------------------------------------------------------
-# 3. Computer Vision Helper Functions
 # -----------------------------------------------------------------------------
 def get_canny_image(image, low_threshold=100, high_threshold=200):
@@ -93,7 +76,7 @@ def get_canny_image(image, low_threshold=100, high_threshold=200):
     return Image.fromarray(canny_edges)
 # -----------------------------------------------------------------------------
-# 4. Inference Logic
 # -----------------------------------------------------------------------------
 @spaces.GPU(duration=120)
@@ -107,14 +90,14 @@ def generate_controlled_image(
     seed
 ):
     if input_image is None:
-        raise gr.Error("Validation Error: Please upload an image first!")
-    # 1. Preprocess Image (SDXL works best at 1024x1024)
     width, height = 1024, 1024
     input_image = input_image.resize((width, height))
     canny_image = get_canny_image(input_image)
-    # 2. Configuration
     style_config = LORA_REGISTRY[lora_selection]
     repo_id = style_config["repo"]
     trigger_text = style_config["trigger"]
@@ -123,29 +106,24 @@ def generate_controlled_image(
     final_prompt = f"{trigger_text}{prompt}"
-    # 3. LoRA & Generation Block
-    # We use a try/finally block to ensure LoRA is ALWAYS unloaded,
-    # preventing state corruption on the shared GPU.
-    try:
-        # A. Load LoRA
-        if repo_id:
             print(f"Loading LoRA: {repo_id}")
-            # Ensure we are in a clean state before loading
-            pipe.unload_lora_weights()
             if lora_file:
                 pipe.load_lora_weights(repo_id, weight_name=lora_file)
             else:
                 pipe.load_lora_weights(repo_id)
-            pipe.fuse_lora(lora_scale=lora_weight)
-            print("LoRA fused successfully.")
-        # B. Generate
-        generator = torch.Generator("cuda").manual_seed(int(seed))
-        print(f"Generating with Prompt: {final_prompt}")
-        output_image = pipe(
             prompt=final_prompt,
             negative_prompt=negative_prompt,
             image=canny_image,
@@ -153,43 +131,33 @@ def generate_controlled_image(
             controlnet_conditioning_scale=float(controlnet_conditioning_scale),
             guidance_scale=7.0,
             generator=generator,
-        ).images
     except Exception as e:
         raise e
-    finally:
-        # C. Cleanup (Always run this)
-        if repo_id:
-            print("Cleaning up LoRA weights...")
-            try:
-                pipe.unfuse_lora()
-                pipe.unload_lora_weights()
-            except Exception as cleanup_error:
-                print(f"Cleanup warning: {cleanup_error}")
-        # Explicit cache clearing for ZeroGPU shared environment
-        torch.cuda.empty_cache()
     return canny_image, output_image
 # -----------------------------------------------------------------------------
-# 5. Gradio UI Architecture
 # -----------------------------------------------------------------------------
 css = """
-#col-container {max_width: 1200px; margin-left: auto; margin-right: auto;}
 .guide-text {font-size: 1.1em; color: #4a5568;}
 """
-# Example Data
 examples = [
     [
         "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_bird_canny.png",
         "a colorful exotic bird sitting on a branch, detailed feathers, masterpiece, 8k",
         "blurry, low quality, deformed, illustration",
         "None (Base SDXL)",
-        1.0, 30, 42
     ],
     [
         "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_vermeer_depth.png",
@@ -203,14 +171,14 @@ examples = [
         "pixel art, a cute bird, isometric view, retro game asset, 8-bit graphics",
         "photorealistic, vector, high resolution, smooth, 3d render",
         "Pixel Art XL",
-        1.0, 30, 202
     ],
     [
         "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_room_mlsd.png",
-        "made-of-clay, claymation style, interior of a modern living room, stop motion animation, plasticine texture, fingerprint textures",
         "cgi, 3d render, glossy, architectural visualization",
         "Claymation Style XL",
-        1.0, 30, 303
     ],
 ]
@@ -222,65 +190,56 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
             """
             <p class='guide-text'>
             <b>SDXL Edition.</b><br>
-            Higher resolution, better prompt adherence, and native LoRA support.
-            Uses <b>ControlNet Canny (SDXL)</b> for structure.
             </p>
             """
         )
         with gr.Row():
-            # Left Column: Inputs
             with gr.Column(scale=1):
-                input_image = gr.Image(label="Input Image (Structure)", type="pil", sources=["upload", "clipboard"])
                 prompt = gr.Textbox(
                     label="Prompt",
                     value="A house on a hill, sunny day, masterpiece",
-                    placeholder="Describe the content...",
                     lines=2
                 )
                 negative_prompt = gr.Textbox(
                     label="Negative Prompt",
-                    value="blurry, low quality, distorted, ugly, bad anatomy, watermark, text",
                     lines=1
                 )
                 lora_selection = gr.Dropdown(
-                    label="Select LoRA Style",
                     choices=list(LORA_REGISTRY.keys()),
-                    value="None (Base SDXL)",
-                    info="Automatically injects trigger words and loads weights."
                 )
-                with gr.Accordion("⚙️ Advanced Settings", open=False):
                     controlnet_conditioning_scale = gr.Slider(
                         label="ControlNet Strength",
-                        minimum=0.0, maximum=1.5, value=0.8, step=0.1,
-                        info="SDXL ControlNet is strong. 0.8 is usually a good sweet spot."
                     )
-                    steps = gr.Slider(label="Inference Steps", minimum=10, maximum=50, value=30, step=1)
                     seed = gr.Number(label="Seed", value=42, precision=0)
-                submit_btn = gr.Button("Generate Art", variant="primary", size="lg")
-            # Right Column: Outputs
             with gr.Column(scale=1):
                 with gr.Row():
-                    output_canny = gr.Image(label="Detected Edges", type="pil")
-                    output_result = gr.Image(label="Final Stylized Image", type="pil")
-        # Examples Section
-        gr.Markdown("### 🔍 Try These Examples")
         gr.Examples(
             examples=examples,
             inputs=[input_image, prompt, negative_prompt, lora_selection, controlnet_conditioning_scale, steps, seed],
             outputs=[output_canny, output_result],
             fn=generate_controlled_image,
-            cache_examples=False # Must be False for ZeroGPU async compatibility
         )
-    # Event Wiring
     submit_btn.click(
         fn=generate_controlled_image,
         inputs=[

 import cv2
 import numpy as np
 from PIL import Image
 from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel, AutoencoderKL, UniPCMultistepScheduler
 # -----------------------------------------------------------------------------
+# Configuration & Registry
 # -----------------------------------------------------------------------------
 LORA_REGISTRY = {
     "None (Base SDXL)": {
 }
 # -----------------------------------------------------------------------------
+# Model Initialization (CPU only, ZeroGPU handles device transfer)
 # -----------------------------------------------------------------------------
+print("Initializing SDXL Pipeline on CPU...")
 vae = AutoencoderKL.from_pretrained(
     "madebyollin/sdxl-vae-fp16-fix",
+    torch_dtype=torch.float16
 )
 controlnet = ControlNetModel.from_pretrained(
     "diffusers/controlnet-canny-sdxl-1.0",
+    torch_dtype=torch.float16,
     use_safetensors=True
 )
 pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0",
     controlnet=controlnet,
     vae=vae,
+    torch_dtype=torch.float16,
     use_safetensors=True
 )
 pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
+print("Pipeline loaded. ZeroGPU will handle device management.")
 # -----------------------------------------------------------------------------
+# Helper Functions
 # -----------------------------------------------------------------------------
 def get_canny_image(image, low_threshold=100, high_threshold=200):
     return Image.fromarray(canny_edges)
 # -----------------------------------------------------------------------------
+# Inference Logic
 # -----------------------------------------------------------------------------
 @spaces.GPU(duration=120)
     seed
 ):
     if input_image is None:
+        raise gr.Error("Please upload an image first!")
     width, height = 1024, 1024
     input_image = input_image.resize((width, height))
     canny_image = get_canny_image(input_image)
+    pipe.unload_lora_weights()
     style_config = LORA_REGISTRY[lora_selection]
     repo_id = style_config["repo"]
     trigger_text = style_config["trigger"]
     final_prompt = f"{trigger_text}{prompt}"
+    if repo_id:
+        try:
             print(f"Loading LoRA: {repo_id}")
             if lora_file:
                 pipe.load_lora_weights(repo_id, weight_name=lora_file)
             else:
                 pipe.load_lora_weights(repo_id)
+            print("LoRA loaded successfully.")
+        except Exception as e:
+            print(f"LoRA Load Error: {e}")
+            gr.Warning(f"Failed to load LoRA. Using base model.")
+    generator = torch.Generator("cuda").manual_seed(int(seed))
+    print(f"Generating: {final_prompt[:100]}...")
+    try:
+        output = pipe(
             prompt=final_prompt,
             negative_prompt=negative_prompt,
             image=canny_image,
             controlnet_conditioning_scale=float(controlnet_conditioning_scale),
             guidance_scale=7.0,
             generator=generator,
+        )
+        output_image = output.images[0]
     except Exception as e:
+        pipe.unload_lora_weights()
         raise e
+    pipe.unload_lora_weights()
+    torch.cuda.empty_cache()
     return canny_image, output_image
 # -----------------------------------------------------------------------------
+# Gradio UI
 # -----------------------------------------------------------------------------
 css = """
+#col-container {max-width: 1200px; margin-left: auto; margin-right: auto;}
 .guide-text {font-size: 1.1em; color: #4a5568;}
 """
 examples = [
     [
         "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_bird_canny.png",
         "a colorful exotic bird sitting on a branch, detailed feathers, masterpiece, 8k",
         "blurry, low quality, deformed, illustration",
         "None (Base SDXL)",
+        0.8, 30, 42
     ],
     [
         "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_vermeer_depth.png",
         "pixel art, a cute bird, isometric view, retro game asset, 8-bit graphics",
         "photorealistic, vector, high resolution, smooth, 3d render",
         "Pixel Art XL",
+        0.8, 30, 202
     ],
     [
         "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_room_mlsd.png",
+        "made-of-clay, claymation style, interior of a modern living room, stop motion animation, plasticine texture",
         "cgi, 3d render, glossy, architectural visualization",
         "Claymation Style XL",
+        0.8, 30, 303
     ],
 ]
             """
             <p class='guide-text'>
             <b>SDXL Edition.</b><br>
+            Uses ControlNet Canny (SDXL) for structure preservation with LoRA styles.
             </p>
             """
         )
         with gr.Row():
             with gr.Column(scale=1):
+                input_image = gr.Image(label="Input Image", type="pil", sources=["upload", "clipboard"])
                 prompt = gr.Textbox(
                     label="Prompt",
                     value="A house on a hill, sunny day, masterpiece",
                     lines=2
                 )
                 negative_prompt = gr.Textbox(
                     label="Negative Prompt",
+                    value="blurry, low quality, distorted, ugly, watermark",
                     lines=1
                 )
                 lora_selection = gr.Dropdown(
+                    label="LoRA Style",
                     choices=list(LORA_REGISTRY.keys()),
+                    value="None (Base SDXL)"
                 )
+                with gr.Accordion("Advanced Settings", open=False):
                     controlnet_conditioning_scale = gr.Slider(
                         label="ControlNet Strength",
+                        minimum=0.0, maximum=1.5, value=0.8, step=0.1
                     )
+                    steps = gr.Slider(label="Steps", minimum=10, maximum=50, value=30, step=1)
                     seed = gr.Number(label="Seed", value=42, precision=0)
+                submit_btn = gr.Button("Generate", variant="primary", size="lg")
             with gr.Column(scale=1):
                 with gr.Row():
+                    output_canny = gr.Image(label="Canny Edges", type="pil")
+                    output_result = gr.Image(label="Result", type="pil")
         gr.Examples(
             examples=examples,
             inputs=[input_image, prompt, negative_prompt, lora_selection, controlnet_conditioning_scale, steps, seed],
             outputs=[output_canny, output_result],
             fn=generate_controlled_image,
+            cache_examples=False
         )
     submit_btn.click(
         fn=generate_controlled_image,
         inputs=[