| import gradio as gr |
| from diffusers import ControlNetModel, StableDiffusionXLPipeline, StableDiffusionXLControlNetPipeline, AutoencoderKL, EulerAncestralDiscreteScheduler |
| import torch |
| import numpy as np |
| import cv2 |
| from PIL import Image |
| import spaces |
|
|
|
|
| |
| device = "cuda" |
| precision = torch.float16 |
|
|
| |
| |
| |
| controlnet = ControlNetModel.from_pretrained( |
| "xinsir/controlnet-canny-sdxl-1.0", |
| torch_dtype=precision |
| ) |
|
|
| |
| vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=precision) |
|
|
| |
| eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler") |
|
|
| |
| pipe_cn = StableDiffusionXLControlNetPipeline.from_pretrained( |
| "stabilityai/stable-diffusion-xl-base-1.0", |
| controlnet=controlnet, |
| vae=vae, |
| torch_dtype=precision, |
| scheduler=eulera_scheduler, |
| ) |
| pipe_cn.to(device) |
|
|
| |
| pipe = StableDiffusionXLPipeline.from_pretrained( |
| "stabilityai/stable-diffusion-xl-base-1.0", |
| vae=vae, |
| torch_dtype=precision, |
| scheduler=eulera_scheduler, |
| ) |
| pipe.to(device) |
|
|
|
|
| |
| @spaces.GPU |
| def apply_canny(image, low_threshold, high_threshold): |
| image = np.array(image) |
| image = cv2.Canny(image, low_threshold, high_threshold) |
| image = image[:, :, None] |
| image = np.concatenate([image, image, image], axis=2) |
| return Image.fromarray(image) |
|
|
|
|
| |
| @spaces.GPU |
| def generate_image(prompt, input_image, low_threshold, high_threshold, strength, guidance, controlnet_conditioning_scale): |
|
|
| |
| edge_detected = apply_canny(input_image, low_threshold, high_threshold) |
| |
| |
| result = pipe_cn( |
| prompt=prompt, |
| image=edge_detected, |
| num_inference_steps=30, |
| guidance_scale=guidance, |
| controlnet_conditioning_scale=float(controlnet_conditioning_scale), |
| strength=strength |
| ).images[0] |
| |
| return edge_detected, result |
|
|
|
|
| |
| @spaces.GPU |
| def generate_prompt(prompt, strength, guidance): |
| |
| |
| result = pipe( |
| prompt=prompt, |
| num_inference_steps=30, |
| guidance_scale=guidance, |
| strength=strength |
| ).images[0] |
|
|
| return result, result |
|
|
|
|
| |
| with gr.Blocks() as demo: |
| gr.Markdown("# ποΈ 3D Screenshot to Styled Render with ControlNet") |
|
|
| with gr.Row(): |
| with gr.Column(): |
| input_image = gr.Image(label="Upload 3D Screenshot", type="pil") |
| prompt = gr.Textbox(label="Style Prompt", placeholder="e.g., Futuristic building in sunset") |
| |
| low_threshold = gr.Slider(50, 150, value=100, label="Canny Edge Low Threshold") |
| high_threshold = gr.Slider(100, 200, value=150, label="Canny Edge High Threshold") |
| |
| strength = gr.Slider(0.1, 1.0, value=0.7, label="Denoising Strength") |
| guidance = gr.Slider(1, 20, value=7.5, label="Guidance Scale (Creativity)") |
| controlnet_conditioning_scale = gr.Slider(0, 1, value=0.5, step=0.01, label="ControlNet Conditioning Scale") |
|
|
| with gr.Row(): |
| generate_img_button = gr.Button("Generate from Image") |
| generate_prompt_button = gr.Button("Generate from Prompt") |
| |
|
|
| with gr.Column(): |
| edge_output = gr.Image(label="Edge Detected Image") |
| result_output = gr.Image(label="Generated Styled Image") |
|
|
| |
| generate_img_button.click( |
| fn=generate_image, |
| inputs=[prompt, input_image, low_threshold, high_threshold, strength, guidance, controlnet_conditioning_scale], |
| outputs=[edge_output, result_output] |
| ) |
|
|
| generate_prompt_button.click( |
| fn=generate_prompt, |
| inputs=[prompt, strength, guidance], |
| outputs=[edge_output, result_output] |
| ) |
|
|
|
|
| |
| demo.launch(share=True) |