Spaces:

mvp-lab
/

70113_ImgGen_Diffusion_ControlNetxLoRA

Sleeping

App Files Files Community

oliveryanzuolu commited on 14 days ago

Commit

834f6ad

verified ·

1 Parent(s): d6df1df

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -74

app.py CHANGED Viewed

@@ -7,64 +7,77 @@ from PIL import Image
 import os
 # Diffusers and ControlNet imports
-from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
 # -----------------------------------------------------------------------------
-# 1. Configuration & Registry
 # -----------------------------------------------------------------------------
 LORA_REGISTRY = {
-    "None (Base SD1.5)": {
         "repo": None,
         "trigger": "",
         "weight": 0.0
     },
-    "Lego Style": {
         "repo": "lordjia/lelo-lego-lora-for-xl-sd1-5",
         "trigger": "LEGO Creator, LEGO MiniFig, ",
         "weight": 0.8,
         "file": "Lego_XL_v2.1.safetensors"
     },
-    "Claymation Style": {
         "repo": "DoctorDiffusion/doctor-diffusion-s-claymation-style-lora",
         "trigger": "made-of-clay, claymation style, ",
-        "weight": 1.0
     },
-    "Pixel Art (SD1.5)": {
-        "repo": "artificialguybr/pixelartredmond-1-5v-pixel-art-loras-for-sd-1-5",
-        "trigger": "pixel art, PixArFK, ",
-        "weight": 1.0
     }
 }
 # -----------------------------------------------------------------------------
 # 2. Model Initialization
 # -----------------------------------------------------------------------------
-print("Initializing Inference Pipeline...")
 device = "cuda" if torch.cuda.is_available() else "cpu"
 dtype = torch.float16 if device == "cuda" else torch.float32
-# Load ControlNet
 controlnet = ControlNetModel.from_pretrained(
-    "lllyasviel/sd-controlnet-canny",
     torch_dtype=dtype,
     use_safetensors=True
 )
-# Load Base Stable Diffusion 1.5
-pipe = StableDiffusionControlNetPipeline.from_pretrained(
-    "stable-diffusion-v1-5/stable-diffusion-v1-5",
     controlnet=controlnet,
     torch_dtype=dtype,
     use_safetensors=True
 )
 pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
-if device == "cuda":
     pipe.to(device)
-print("Base Pipeline Loaded Successfully.")
 # -----------------------------------------------------------------------------
 # 3. Computer Vision Helper Functions
@@ -94,8 +107,8 @@ def generate_controlled_image(
     if input_image is None:
         raise gr.Error("Validation Error: Please upload an image first!")
-    # 1. Preprocess Image
-    width, height = 512, 512
     input_image = input_image.resize((width, height))
     canny_image = get_canny_image(input_image)
@@ -106,13 +119,18 @@ def generate_controlled_image(
     repo_id = style_config["repo"]
     trigger_text = style_config["trigger"]
     lora_weight = style_config["weight"]
     final_prompt = f"{trigger_text}{prompt}"
     try:
         if repo_id:
             print(f"Loading LoRA: {repo_id}")
-            pipe.load_lora_weights(repo_id)
             pipe.fuse_lora(lora_scale=lora_weight)
             print("LoRA fused successfully.")
@@ -132,7 +150,7 @@ def generate_controlled_image(
             image=canny_image,
             num_inference_steps=int(steps),
             controlnet_conditioning_scale=float(controlnet_conditioning_scale),
-            guidance_scale=7.5,
             generator=generator,
         ).images
     except Exception as e:
@@ -159,77 +177,48 @@ css = """
 .guide-text {font-size: 1.1em; color: #4a5568;}
 """
-# Example Data (Using resolve URLs)
 examples = [
     [
         "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_bird_canny.png",
-        "a colorful exotic bird sitting on a branch, detailed feathers, masterpiece",
-        "blurry, low quality, deformed",
-        "None (Base SD1.5)",
         1.0, 30, 42
     ],
     [
         "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_vermeer_depth.png",
-        "portrait of a girl with a pearl earring, made of plastic blocks, interlocking bricks, toy aesthetic",
-        "human skin, realistic, painting, blurry",
-        "Lego Style",
         0.8, 30, 101
     ],
     [
         "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_bird_hed.png",
-        "a cute bird, isometric view, retro game asset, 8-bit graphics",
-        "photorealistic, vector, high resolution, smooth",
-        "Pixel Art (SD1.5)",
         1.0, 30, 202
     ],
     [
         "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_room_mlsd.png",
-        "interior of a modern living room, stop motion animation, plasticine texture, fingerprint textures",
         "cgi, 3d render, glossy, architectural visualization",
-        "Claymation Style",
         1.0, 30, 303
     ],
-    [
-        "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_human_normal.png",
-        "a man in a suit walking, built with lego bricks, 3d render, studio lighting",
-        "flesh, organic, fabric, realistic face",
-        "Lego Style",
-        0.9, 30, 404
-    ],
-    [
-        "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_human_openpose.png",
-        "a fashion model posing, pixelated style, 16-bit color palette, arcade style",
-        "blur, anti-aliasing, photograph",
-        "Pixel Art (SD1.5)",
-        1.0, 30, 505
-    ],
-    [
-        "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_vermeer_scribble.png",
-        "classic portrait painting, whimsical clay character, soft lighting, play-doh style",
-        "oil painting, canvas texture, flat",
-        "Claymation Style",
-        1.0, 30, 606
-    ],
-    [
-        "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_room_seg.png",
-        "empty room interior, white walls, wooden floor, high quality, photorealistic",
-        "furniture, clutter, messy, low res",
-        "None (Base SD1.5)",
-        0.8, 30, 707
-    ]
 ]
 with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.Markdown("# 🎨 ControlNet + LoRA Style Mixer")
         gr.Markdown(
             """
             <p class='guide-text'>
-            <b>Transform Structure into Style.</b><br>
-            This pipeline uses <b>ControlNet (Canny)</b> to lock the edges of your image,
-            and <b>LoRA</b> adapters to completely repaint the texture.
-            Select a preset below to see the magic.
             </p>
             """
         )
@@ -248,22 +237,22 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
                 negative_prompt = gr.Textbox(
                     label="Negative Prompt",
-                    value="blurry, low quality, distorted, ugly, bad anatomy, watermark",
                     lines=1
                 )
                 lora_selection = gr.Dropdown(
                     label="Select LoRA Style",
                     choices=list(LORA_REGISTRY.keys()),
-                    value="None (Base SD1.5)",
                     info="Automatically injects trigger words and loads weights."
                 )
                 with gr.Accordion("⚙️ Advanced Settings", open=False):
                     controlnet_conditioning_scale = gr.Slider(
-                        label="ControlNet Strength (Edge Fidelity)",
-                        minimum=0.0, maximum=2.0, value=1.0, step=0.1,
-                        info="1.0 = Follow edges strictly. Lower for more 'creative' interpretation."
                     )
                     steps = gr.Slider(label="Inference Steps", minimum=10, maximum=50, value=30, step=1)
                     seed = gr.Number(label="Seed", value=42, precision=0)
@@ -273,7 +262,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
             # Right Column: Outputs
             with gr.Column(scale=1):
                 with gr.Row():
-                    output_canny = gr.Image(label="Detected Edges (ControlNet Sees This)", type="pil")
                     output_result = gr.Image(label="Final Stylized Image", type="pil")
         # Examples Section
@@ -283,7 +272,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
             inputs=[input_image, prompt, negative_prompt, lora_selection, controlnet_conditioning_scale, steps, seed],
             outputs=[output_canny, output_result],
             fn=generate_controlled_image,
-            cache_examples=False # CRITICAL FIX: Set to False to prevent async loop errors
         )
     # Event Wiring

 import os
 # Diffusers and ControlNet imports
+from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel, AutoencoderKL, UniPCMultistepScheduler
 # -----------------------------------------------------------------------------
+# 1. Configuration & Registry (SDXL Version)
 # -----------------------------------------------------------------------------
 LORA_REGISTRY = {
+    "None (Base SDXL)": {
         "repo": None,
         "trigger": "",
         "weight": 0.0
     },
+    "Lego Style XL": {
         "repo": "lordjia/lelo-lego-lora-for-xl-sd1-5",
         "trigger": "LEGO Creator, LEGO MiniFig, ",
         "weight": 0.8,
         "file": "Lego_XL_v2.1.safetensors"
     },
+    "Claymation Style XL": {
         "repo": "DoctorDiffusion/doctor-diffusion-s-claymation-style-lora",
         "trigger": "made-of-clay, claymation style, ",
+        "weight": 0.9,
+        "file": "DD-made-of-clay-XL-v2.safetensors"
     },
+    "Pixel Art XL": {
+        "repo": "nerijs/pixel-art-xl",
+        "trigger": "pixel art, ",
+        "weight": 1.0,
+        "file": "pixel-art-xl.safetensors"
     }
 }
 # -----------------------------------------------------------------------------
 # 2. Model Initialization
 # -----------------------------------------------------------------------------
+print("Initializing SDXL Inference Pipeline...")
 device = "cuda" if torch.cuda.is_available() else "cpu"
 dtype = torch.float16 if device == "cuda" else torch.float32
+# 1. Load VAE (Critical for SDXL fp16 stability)
+vae = AutoencoderKL.from_pretrained(
+    "madebyollin/sdxl-vae-fp16-fix",
+    torch_dtype=dtype
+)
+# 2. Load ControlNet (Must be SDXL version)
 controlnet = ControlNetModel.from_pretrained(
+    "diffusers/controlnet-canny-sdxl-1.0",
     torch_dtype=dtype,
     use_safetensors=True
 )
+# 3. Load Base SDXL
+pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-xl-base-1.0",
     controlnet=controlnet,
+    vae=vae,
     torch_dtype=dtype,
     use_safetensors=True
 )
+# Optimization
 pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
+try:
+    pipe.enable_model_cpu_offload()
+except Exception as e:
+    print(f"Warning: CPU offload failed, moving to device manually. {e}")
     pipe.to(device)
+print("SDXL Pipeline Loaded Successfully.")
 # -----------------------------------------------------------------------------
 # 3. Computer Vision Helper Functions
     if input_image is None:
         raise gr.Error("Validation Error: Please upload an image first!")
+    # 1. Preprocess Image (SDXL works best at 1024x1024)
+    width, height = 1024, 1024
     input_image = input_image.resize((width, height))
     canny_image = get_canny_image(input_image)
     repo_id = style_config["repo"]
     trigger_text = style_config["trigger"]
     lora_weight = style_config["weight"]
+    lora_file = style_config.get("file", None)
     final_prompt = f"{trigger_text}{prompt}"
     try:
         if repo_id:
             print(f"Loading LoRA: {repo_id}")
+            if lora_file:
+                pipe.load_lora_weights(repo_id, weight_name=lora_file)
+            else:
+                pipe.load_lora_weights(repo_id)
             pipe.fuse_lora(lora_scale=lora_weight)
             print("LoRA fused successfully.")
             image=canny_image,
             num_inference_steps=int(steps),
             controlnet_conditioning_scale=float(controlnet_conditioning_scale),
+            guidance_scale=7.0, # SDXL usually prefers slightly lower CFG than SD1.5
             generator=generator,
         ).images
     except Exception as e:
 .guide-text {font-size: 1.1em; color: #4a5568;}
 """
+# Example Data (Updated for SDXL context)
 examples = [
     [
         "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_bird_canny.png",
+        "a colorful exotic bird sitting on a branch, detailed feathers, masterpiece, 8k",
+        "blurry, low quality, deformed, illustration",
+        "None (Base SDXL)",
         1.0, 30, 42
     ],
     [
         "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_vermeer_depth.png",
+        "portrait of a girl with a pearl earring, made of plastic blocks, interlocking bricks, toy aesthetic, macro photography",
+        "human skin, realistic, painting, blurry, drawing",
+        "Lego Style XL",
         0.8, 30, 101
     ],
     [
         "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_bird_hed.png",
+        "pixel art, a cute bird, isometric view, retro game asset, 8-bit graphics",
+        "photorealistic, vector, high resolution, smooth, 3d render",
+        "Pixel Art XL",
         1.0, 30, 202
     ],
     [
         "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_room_mlsd.png",
+        "made-of-clay, claymation style, interior of a modern living room, stop motion animation, plasticine texture, fingerprint textures",
         "cgi, 3d render, glossy, architectural visualization",
+        "Claymation Style XL",
         1.0, 30, 303
     ],
 ]
 with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown("# 🎨 SDXL ControlNet + LoRA Mixer")
         gr.Markdown(
             """
             <p class='guide-text'>
+            <b>SDXL Edition.</b><br>
+            Higher resolution, better prompt adherence, and native LoRA support.
+            Uses <b>ControlNet Canny (SDXL)</b> for structure.
             </p>
             """
         )
                 negative_prompt = gr.Textbox(
                     label="Negative Prompt",
+                    value="blurry, low quality, distorted, ugly, bad anatomy, watermark, text",
                     lines=1
                 )
                 lora_selection = gr.Dropdown(
                     label="Select LoRA Style",
                     choices=list(LORA_REGISTRY.keys()),
+                    value="None (Base SDXL)",
                     info="Automatically injects trigger words and loads weights."
                 )
                 with gr.Accordion("⚙️ Advanced Settings", open=False):
                     controlnet_conditioning_scale = gr.Slider(
+                        label="ControlNet Strength",
+                        minimum=0.0, maximum=1.5, value=0.8, step=0.1,
+                        info="SDXL ControlNet is strong. 0.8 is usually a good sweet spot."
                     )
                     steps = gr.Slider(label="Inference Steps", minimum=10, maximum=50, value=30, step=1)
                     seed = gr.Number(label="Seed", value=42, precision=0)
             # Right Column: Outputs
             with gr.Column(scale=1):
                 with gr.Row():
+                    output_canny = gr.Image(label="Detected Edges", type="pil")
                     output_result = gr.Image(label="Final Stylized Image", type="pil")
         # Examples Section
             inputs=[input_image, prompt, negative_prompt, lora_selection, controlnet_conditioning_scale, steps, seed],
             outputs=[output_canny, output_result],
             fn=generate_controlled_image,
+            cache_examples=False # Keep False for stability
         )
     # Event Wiring