Spaces:

mvp-lab
/

70113_ImgGen_Diffusion_ControlNetxLoRA

Sleeping

App Files Files Community

oliveryanzuolu commited on Feb 2

Commit

71a28f3

verified ·

1 Parent(s): b1fa94a

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -46

app.py CHANGED Viewed

@@ -7,24 +7,21 @@ from PIL import Image
 from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
 # -----------------------------------------------------------------------------
-# 1. Model Setup (Global Loading)
 # -----------------------------------------------------------------------------
 print("Loading models... This might take a minute.")
 device = "cuda" if torch.cuda.is_available() else "cpu"
 dtype = torch.float16
-# A. Load ControlNet (Canny)
-# We use Canny because it's the most intuitive for students to understand "Edge Detection"
 controlnet = ControlNetModel.from_pretrained(
     "lllyasviel/sd-controlnet-canny",
     torch_dtype=dtype,
     use_safetensors=True
 )
-# B. Load Base Stable Diffusion 1.5
-# SD1.5 is chosen over SDXL here because swapping LoRAs on the fly is much faster
-# and less memory intensive for a live demo.
 model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
 pipe = StableDiffusionControlNetPipeline.from_pretrained(
     model_id,
@@ -33,7 +30,6 @@ pipe = StableDiffusionControlNetPipeline.from_pretrained(
     use_safetensors=True
 )
-# Use a fast scheduler
 pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
 pipe.enable_model_cpu_offload()
@@ -44,14 +40,8 @@ print("Base models loaded.")
 # -----------------------------------------------------------------------------
 def get_canny_image(image, low_threshold=100, high_threshold=200):
-    """
-    Converts an input image into a Canny edge map.
-    This helps students visualize what the ControlNet actually 'sees'.
-    """
     image = np.array(image)
-    # Convert to grayscale edges
     canny_image = cv2.Canny(image, low_threshold, high_threshold)
-    # Convert back to 3-channel RGB for the model
     canny_image = canny_image[:, :, None]
     canny_image = np.concatenate([canny_image, canny_image, canny_image], axis=2)
     return Image.fromarray(canny_image)
@@ -60,14 +50,10 @@ def get_canny_image(image, low_threshold=100, high_threshold=200):
 # 3. Inference Logic
 # -----------------------------------------------------------------------------
-# Define available LoRAs for the tutorial
-# Format: "Display Name": "HuggingFace_Path"
 LORA_OPTIONS = {
     "None (Base SD1.5)": None,
-    "Lego Style": "minimaxir/sd-1-5-lego-lora",  # Turns objects into Lego
-    "Claymation Style": "MoShin/clay-style-lora-sd1.5", # Turns objects into Clay
-    "Pixel Art": "nerijs/pixel-art-xl", # Note: Some LoRAs might be specific, stick to SD1.5 ones usually
-    # Let's use a reliable Pixel Art for 1.5
     "Pixel Art (SD1.5)": "ismail/pixel-art-style-lora"
 }
@@ -75,7 +61,7 @@ LORA_OPTIONS = {
 def generate_controlled_image(
     input_image,
     prompt,
-    negative_prompt,
     lora_selection,
     controlnet_conditioning_scale,
     steps,
@@ -84,35 +70,27 @@ def generate_controlled_image(
     if input_image is None:
         raise gr.Error("Please upload an image first!")
-    # 1. Preprocess: Create Canny Map
-    # We resize to 512x512 for standard SD1.5 inference
     input_image = input_image.resize((512, 512))
     canny_image = get_canny_image(input_image)
-    # 2. Manage LoRA Adapters
-    # This is the key educational part: Dynamic Adapter Swapping
     try:
-        pipe.unload_lora_weights() # Clear previous LoRAs
         lora_path = LORA_OPTIONS[lora_selection]
         if lora_path:
             print(f"Loading LoRA: {lora_path}")
-            # adapter_name is optional but good practice
             pipe.load_lora_weights(lora_path)
     except Exception as e:
         print(f"Error loading LoRA: {e}")
-        # Continue without LoRA if it fails
-    # 3. Generator for reproducibility
-    generator = torch.Generator("cuda").manual_seed(seed)
-    # 4. Inference
     print("Generating...")
     result = pipe(
         prompt=prompt,
         negative_prompt=negative_prompt,
-        image=canny_image, # The ControlNet input
         num_inference_steps=steps,
         controlnet_conditioning_scale=float(controlnet_conditioning_scale),
         generator=generator,
@@ -128,7 +106,8 @@ css = """
 #col-container {max_width: 1200px; margin-left: auto; margin-right: auto;}
 """
-with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
     with gr.Column(elem_id="col-container"):
         gr.Markdown("# Tutorial: ControlNet + LoRA")
@@ -138,7 +117,6 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
         )
         with gr.Row():
-            # Left Column: Settings
             with gr.Column(scale=1):
                 input_image = gr.Image(label="Input Image (Structure Source)", type="pil")
@@ -148,7 +126,13 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
                     lines=2
                 )
-                # LoRA Selector
                 lora_selection = gr.Dropdown(
                     label="Select LoRA Style",
                     choices=list(LORA_OPTIONS.keys()),
@@ -156,7 +140,6 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
                     info="LoRA changes the artistic style without changing the model architecture."
                 )
-                # ControlNet Settings
                 with gr.Accordion("Control & Inference Settings", open=True):
                     controlnet_conditioning_scale = gr.Slider(
                         label="ControlNet Strength",
@@ -168,25 +151,26 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
                 submit_btn = gr.Button("Generate", variant="primary")
-            # Right Column: Results
             with gr.Column(scale=1):
                 with gr.Row():
                     output_canny = gr.Image(label="Detected Edges (ControlNet Input)", type="pil")
                     output_result = gr.Image(label="Final Generated Image", type="pil")
     submit_btn.click(
         fn=generate_controlled_image,
         inputs=[
-            input_image, prompt, "blurry, low quality, distorted",
-            lora_selection, controlnet_conditioning_scale, steps, seed
         ],
         outputs=[output_canny, output_result]
     )
-    # Examples are crucial for tutorials
-    # Note: You would need to host a local image or use a URL for the example to work perfectly in Spaces
-    # But here is the structure:
-    # gr.Examples(...)
 if __name__ == "__main__":
-    demo.launch()

 from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
 # -----------------------------------------------------------------------------
+# 1. Model Setup
 # -----------------------------------------------------------------------------
 print("Loading models... This might take a minute.")
 device = "cuda" if torch.cuda.is_available() else "cpu"
 dtype = torch.float16
+# Load ControlNet (Canny)
 controlnet = ControlNetModel.from_pretrained(
     "lllyasviel/sd-controlnet-canny",
     torch_dtype=dtype,
     use_safetensors=True
 )
+# Load Base SD 1.5
 model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
 pipe = StableDiffusionControlNetPipeline.from_pretrained(
     model_id,
     use_safetensors=True
 )
 pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
 pipe.enable_model_cpu_offload()
 # -----------------------------------------------------------------------------
 def get_canny_image(image, low_threshold=100, high_threshold=200):
     image = np.array(image)
     canny_image = cv2.Canny(image, low_threshold, high_threshold)
     canny_image = canny_image[:, :, None]
     canny_image = np.concatenate([canny_image, canny_image, canny_image], axis=2)
     return Image.fromarray(canny_image)
 # 3. Inference Logic
 # -----------------------------------------------------------------------------
 LORA_OPTIONS = {
     "None (Base SD1.5)": None,
+    "Lego Style": "minimaxir/sd-1-5-lego-lora",
+    "Claymation Style": "MoShin/clay-style-lora-sd1.5",
     "Pixel Art (SD1.5)": "ismail/pixel-art-style-lora"
 }
 def generate_controlled_image(
     input_image,
     prompt,
+    negative_prompt, # Added this argument
     lora_selection,
     controlnet_conditioning_scale,
     steps,
     if input_image is None:
         raise gr.Error("Please upload an image first!")
+    # Resize for SD1.5
     input_image = input_image.resize((512, 512))
     canny_image = get_canny_image(input_image)
+    # Manage LoRA
     try:
+        pipe.unload_lora_weights()
         lora_path = LORA_OPTIONS[lora_selection]
         if lora_path:
             print(f"Loading LoRA: {lora_path}")
             pipe.load_lora_weights(lora_path)
     except Exception as e:
         print(f"Error loading LoRA: {e}")
+    generator = torch.Generator("cuda").manual_seed(int(seed))
     print("Generating...")
     result = pipe(
         prompt=prompt,
         negative_prompt=negative_prompt,
+        image=canny_image,
         num_inference_steps=steps,
         controlnet_conditioning_scale=float(controlnet_conditioning_scale),
         generator=generator,
 #col-container {max_width: 1200px; margin-left: auto; margin-right: auto;}
 """
+# FIX: Moved theme and css to launch(), removed from Blocks()
+with gr.Blocks() as demo:
     with gr.Column(elem_id="col-container"):
         gr.Markdown("# Tutorial: ControlNet + LoRA")
         )
         with gr.Row():
             with gr.Column(scale=1):
                 input_image = gr.Image(label="Input Image (Structure Source)", type="pil")
                     lines=2
                 )
+                # FIX: Added a Negative Prompt component
+                negative_prompt = gr.Textbox(
+                    label="Negative Prompt",
+                    value="blurry, low quality, distorted, ugly, bad anatomy",
+                    lines=1
+                )
                 lora_selection = gr.Dropdown(
                     label="Select LoRA Style",
                     choices=list(LORA_OPTIONS.keys()),
                     info="LoRA changes the artistic style without changing the model architecture."
                 )
                 with gr.Accordion("Control & Inference Settings", open=True):
                     controlnet_conditioning_scale = gr.Slider(
                         label="ControlNet Strength",
                 submit_btn = gr.Button("Generate", variant="primary")
             with gr.Column(scale=1):
                 with gr.Row():
                     output_canny = gr.Image(label="Detected Edges (ControlNet Input)", type="pil")
                     output_result = gr.Image(label="Final Generated Image", type="pil")
+    # FIX: inputs list now contains only Gradio components
     submit_btn.click(
         fn=generate_controlled_image,
         inputs=[
+            input_image,
+            prompt,
+            negative_prompt, # Passed the component variable here
+            lora_selection,
+            controlnet_conditioning_scale,
+            steps,
+            seed
         ],
         outputs=[output_canny, output_result]
     )
 if __name__ == "__main__":
+    # FIX: Passed theme and css here
+    demo.launch(theme=gr.themes.Soft(), css=css)