Spaces:

mvp-lab
/

Canny_ControlNet

Sleeping

App Files Files Community

ICGenAIShare07 commited on Mar 1

Commit

1e038da

verified ·

1 Parent(s): 2880446

Upload app.py

Browse files

Files changed (1) hide show

app.py +213 -0

app.py ADDED Viewed

	@@ -0,0 +1,213 @@

+import os
+import gradio as gr
+import torch
+import spaces
+from PIL import Image
+from huggingface_hub import hf_hub_download
+from safetensors.torch import load_file
+from diffusers import ControlNetModel
+from transformers import CLIPTextModel, CLIPTokenizer
+from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
+from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
+# Local files you must upload to the Space (same folder as this app.py)
+from pipeline import build_controlnet_pipe
+from prepare_laion import CannyCFG, canny_auto_median_bilateral
+# -----------------------------
+# Config
+# -----------------------------
+BASE_MODEL = "sd-legacy/stable-diffusion-v1-5"
+WEIGHTS_REPO= "mvp-lab/ControlNet_Weight"
+WEIGHTS_FILENAME = "diffusion_pytorch_model_1.safetensors"
+# Download (cached) and get the local path
+CONTROLNET_PATH = hf_hub_download(
+    repo_id=WEIGHTS_REPO,
+    filename=WEIGHTS_FILENAME,
+    repo_type="model"
+)
+# For ZeroGPU, keep dtype float32 for safety/compatibility.
+DTYPE = torch.float32
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+vae = AutoencoderKL.from_pretrained(BASE_MODEL, subfolder="vae", torch_dtype=DTYPE)
+unet = UNet2DConditionModel.from_pretrained(BASE_MODEL, subfolder="unet", torch_dtype=DTYPE)
+tokenizer = CLIPTokenizer.from_pretrained(BASE_MODEL, subfolder="tokenizer")
+text_encoder = CLIPTextModel.from_pretrained(BASE_MODEL, subfolder="text_encoder", torch_dtype=DTYPE)
+vae.requires_grad_(False)
+unet.requires_grad_(False)
+text_encoder.requires_grad_(False)
+controlnet = ControlNetModel.from_unet(unet, conditioning_channels=3)
+state = load_file(CONTROLNET_PATH)
+missing, unexpected = controlnet.load_state_dict(state, strict=False)
+print(f"[ControlNet] missing={len(missing)}, unexpected={len(unexpected)}")
+pipe = build_controlnet_pipe(
+        base_model_name=BASE_MODEL,
+        controlnet=controlnet,
+        vae=vae,
+        unet=unet,
+        text_encoder=text_encoder,
+        tokenizer=tokenizer,
+        device=DEVICE,
+        weight_dtype=DTYPE,
+        use_unipc=True,
+)
+@torch.inference_mode()
+def run_pipeline(
+    input_image: Image.Image,
+    prompt: str,
+    negative_prompt: str = "",
+    guidance_scale: float = 7.5,
+    num_inference_steps: int = 50,
+    num_images: int = 1,
+    controlnet_conditioning_scale: float = 1.0,
+    resolution: int = 512,
+    return_canny: bool = False,
+):
+    if input_image is None:
+        raise ValueError("input_image is None")
+    if num_images < 1:
+        raise ValueError("num_images must be >= 1")
+    # Resize input
+    img_rgb = input_image.convert("RGB").resize((resolution, resolution))
+    # Compute Canny conditioning image (RGB)
+    canny_cfg = CannyCFG(sigma=0.33, d=7, sigma_color=50, sigma_space=50)
+    canny = canny_auto_median_bilateral(img_rgb, canny_cfg).convert("RGB")
+    generators = [torch.Generator(device=DEVICE).manual_seed(i) for i in range(num_images)]
+    images = pipe(
+        prompt=[prompt] * num_images,
+        negative_prompt=[negative_prompt] * num_images,
+        image=[canny] * num_images,
+        num_inference_steps=int(num_inference_steps),
+        guidance_scale=float(guidance_scale),
+        height=int(resolution),
+        width=int(resolution),
+        generator=generators,
+        controlnet_conditioning_scale=float(controlnet_conditioning_scale),
+    ).images
+    if return_canny:
+        return images, canny
+    return images
+@spaces.GPU
+def generate_image(
+    input_image,
+    positive_prompt,
+    negative_prompt,
+    guidance_scale,
+    num_inference_steps,
+    num_images,
+    controlnet_conditioning_scale,
+    resolution,
+):
+    if input_image is None:
+        raise gr.Error("Please upload an input image.")
+    # If Gradio passes numpy, convert defensively (even though type="pil" should give PIL)
+    if not isinstance(input_image, Image.Image):
+        input_image = Image.fromarray(input_image)
+    imgs, canny = run_pipeline(
+        input_image=input_image,
+        prompt=positive_prompt,
+        negative_prompt=negative_prompt,
+        guidance_scale=float(guidance_scale),
+        num_inference_steps=int(num_inference_steps),
+        num_images=int(num_images),
+        controlnet_conditioning_scale=float(controlnet_conditioning_scale),
+        resolution=int(resolution),
+        return_canny=True,
+    )
+    return canny, imgs
+# ----------- demo -----------
+with gr.Blocks() as demo:
+    gr.Markdown("## ControlNet (Canny) Demo")
+    gr.Markdown("Upload an image and write prompt(s). The model generates images conditioned on Canny edges.")
+    with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(
+                label="Input Image",
+                type="pil",
+                image_mode="RGB",
+            )
+            positive_prompt = gr.Textbox(
+                label="Positive Prompt",
+                value="",
+                lines=2,
+                placeholder="Brief description of image",
+            )
+            negative_prompt = gr.Textbox(
+                label="Negative Prompt",
+                value="",
+                lines=2,
+                placeholder="e.g., an blurry image with deformed structure",
+            )
+            with gr.Row():
+                guidance_scale = gr.Slider(
+                    label="Guidance Scale",
+                    minimum=1.0, maximum=15.0, value=7.5, step=0.1
+                )
+                num_inference_steps = gr.Slider(
+                    label="Steps",
+                    minimum=10, maximum=80, value=50, step=1
+                )
+            with gr.Row():
+                num_images = gr.Slider(
+                    label="Number of Images",
+                    minimum=1, maximum=6, value=1, step=1
+                )
+                controlnet_conditioning_scale = gr.Slider(
+                    label="ControlNet Conditioning Scale",
+                    minimum=0.0, maximum=2.0, value=1.0, step=0.05
+                )
+            resolution = gr.Dropdown(
+                label="Resolution",
+                choices=[256, 384, 512, 640, 768, 1024],
+                value=512,
+            )
+            run_btn = gr.Button("Generate", variant="primary")
+        with gr.Column(scale=1):
+            canny_preview = gr.Image(label="Canny edges image", type="pil")
+            gallery = gr.Gallery(label="Generated Images", columns=2, rows=2, height=420)
+    run_btn.click(
+        fn=generate_image,
+        inputs=[
+            input_image,
+            positive_prompt,
+            negative_prompt,
+            guidance_scale,
+            num_inference_steps,
+            num_images,
+            controlnet_conditioning_scale,
+            resolution,
+        ],
+        outputs=[canny_preview, gallery],
+    )
+if __name__ == "__main__":
+    demo.launch()