ImageGeneration_SD-XL-1.0_MultiControlNet

Runtime error

App Files Files Community

ar0551 commited on Apr 24, 2025

Commit

4601988

verified ·

1 Parent(s): 3f3dcbc

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -74

app.py CHANGED Viewed

@@ -7,18 +7,23 @@ from PIL import Image
 import spaces
-# 🌟 Auto-detect device (CPU/GPU)
 device = "cuda"
 precision = torch.float16
-# 🏗️ Load ControlNet model for Canny edge detection
-# xinsir/controlnet-canny-sdxl-1.0
-# diffusers/controlnet-canny-sdxl-1.0
-controlnet = ControlNetModel.from_pretrained(
     "xinsir/controlnet-canny-sdxl-1.0",
     torch_dtype=precision
 )
 # when test with other base model, you need to change the vae also.
 vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=precision)
@@ -26,68 +31,31 @@ vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype
 eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")
 # Stable Diffusion Model with ControlNet
-pipe_cn = StableDiffusionXLControlNetPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0",
     controlnet=controlnet,
     vae=vae,
     torch_dtype=precision,
     scheduler=eulera_scheduler,
 )
-pipe_cn.to(device)
-# Stable Diffusion Model without ControlNet
-pipe = StableDiffusionXLPipeline.from_pretrained(
-    "stabilityai/stable-diffusion-xl-base-1.0",
-    vae=vae,
-    torch_dtype=precision,
-    scheduler=eulera_scheduler,
-)
-pipe.to(device)
-# 📸 Edge detection function using OpenCV (Canny)
-@spaces.GPU
-def apply_canny(image, low_threshold, high_threshold):
-    image = np.array(image)
-    image = cv2.Canny(image, low_threshold, high_threshold)
-    image = image[:, :, None]
-    image = np.concatenate([image, image, image], axis=2)
-    return Image.fromarray(image)
 # 🎨 Image generation function from image
 @spaces.GPU
-def generate_image(prompt, input_image, low_threshold, high_threshold, strength, guidance, controlnet_conditioning_scale):
-    # Apply edge detection
-    edge_detected = apply_canny(input_image, low_threshold, high_threshold)
     # Generate styled image using ControlNet
-    result = pipe_cn(
         prompt=prompt,
-        image=edge_detected,
         num_inference_steps=30,
         guidance_scale=guidance,
-        controlnet_conditioning_scale=float(controlnet_conditioning_scale),
         strength=strength
     ).images[0]
-    return edge_detected, result
-# 🎨 Image generation function from prompt
-@spaces.GPU
-def generate_prompt(prompt, strength, guidance):
-    # Generate styled image from prompt
-    result = pipe(
-        prompt=prompt,
-        num_inference_steps=30,
-        guidance_scale=guidance,
-        strength=strength
-    ).images[0]
-    return result, result
 # 🖥️ Gradio UI
@@ -96,36 +64,27 @@ with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
-            input_image = gr.Image(label="Upload 3D Screenshot", type="pil")
-            prompt = gr.Textbox(label="Style Prompt", placeholder="e.g., Futuristic building in sunset")
-            low_threshold = gr.Slider(50, 150, value=100, label="Canny Edge Low Threshold")
-            high_threshold = gr.Slider(100, 200, value=150, label="Canny Edge High Threshold")
-            strength = gr.Slider(0.1, 1.0, value=0.7, label="Denoising Strength")
-            guidance = gr.Slider(1, 20, value=7.5, label="Guidance Scale (Creativity)")
-            controlnet_conditioning_scale = gr.Slider(0, 1, value=0.5, step=0.01, label="ControlNet Conditioning Scale")
-            with gr.Row():
-                generate_img_button = gr.Button("Generate from Image")
-                generate_prompt_button = gr.Button("Generate from Prompt")
-        with gr.Column():
-            edge_output = gr.Image(label="Edge Detected Image")
-            result_output = gr.Image(label="Generated Styled Image")
     # 🔗 Generate Button Action
     generate_img_button.click(
         fn=generate_image,
-        inputs=[prompt, input_image, low_threshold, high_threshold, strength, guidance, controlnet_conditioning_scale],
-        outputs=[edge_output, result_output]
-    )
-    generate_prompt_button.click(
-        fn=generate_prompt,
-        inputs=[prompt, strength, guidance],
-        outputs=[edge_output, result_output]
     )

 import spaces
+# 🌟 set device and precision
 device = "cuda"
 precision = torch.float16
+# 🏗️ Load ControlNet model for Canny and Depth
+controlnet_canny = ControlNetModel.from_pretrained(
     "xinsir/controlnet-canny-sdxl-1.0",
     torch_dtype=precision
 )
+controlnet_depth = ControlNetModel.from_pretrained(
+    "xinsir/controlnet-depth-sdxl-1.0",
+    torch_dtype=precision
+)
+controlnet = [controlnet_canny, controlnet_depth]
 # when test with other base model, you need to change the vae also.
 vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=precision)
 eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")
 # Stable Diffusion Model with ControlNet
+pipe_canny_depth = StableDiffusionXLControlNetPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0",
     controlnet=controlnet,
     vae=vae,
     torch_dtype=precision,
     scheduler=eulera_scheduler,
 )
+pipe_canny_depth.to(device)
 # 🎨 Image generation function from image
 @spaces.GPU
+def generate_image(prompt, canny_input, depth_input, strength, guidance, canny_conditioning_scale, depth_conditioning_scale):
     # Generate styled image using ControlNet
+    result = pipe_canny_depth(
         prompt=prompt,
+        image=[canny_input, depth_input],
         num_inference_steps=30,
         guidance_scale=guidance,
+        controlnet_conditioning_scale=[float(canny_conditioning_scale), float(depth_conditioning_scale)],
         strength=strength
     ).images[0]
+    return result
 # 🖥️ Gradio UI
     with gr.Row():
         with gr.Column():
+            canny_input = gr.Image(label="Upload Canny Screenshot", type="pil")
+            canny_conditioning_scale = gr.Slider(0, 1, value=0.5, step=0.01, label="Canny Conditioning Scale")
+        with gr.Column():
+            depth_input = gr.Image(label="Upload Depth (ZBuffer) Screenshot", type="pil")
+            depth_conditioning_scale = gr.Slider(0, 1, value=0.5, step=0.01, label="Depth Conditioning Scale")
+    with gr.Row():
+        prompt = gr.Textbox(label="Style Prompt", placeholder="e.g., Futuristic building in sunset")
+        strength = gr.Slider(0.1, 1.0, value=0.7, label="Denoising Strength")
+        guidance = gr.Slider(1, 20, value=7.5, label="Guidance Scale (Creativity)")
+        generate_img_button = gr.Button("Generate from Image")
+    with gr.Row():
+        result_output = gr.Image(label="Generated Styled Image")
     # 🔗 Generate Button Action
     generate_img_button.click(
         fn=generate_image,
+        inputs=[prompt, canny_input, depth_input, strength, guidance, canny_conditioning_scale, depth_conditioning_scale],
+        outputs=[result_output]
     )