Spaces:

mariarivaille
/

Diffusion_Models_Course_Space

Running

App Files Files Community

Maria commited on Mar 7, 2025

Commit

ada0ab1

1 Parent(s): 5cbab77

hw6

Browse files

Files changed (2) hide show

app.py +42 -69
infer.py +255 -0

app.py CHANGED Viewed

@@ -1,75 +1,10 @@
 import gradio as gr
 import numpy as np
-import random
-import os
-# import spaces #[uncomment to use ZeroGPU]
-from diffusers import DiffusionPipeline
-from peft import PeftModel, LoraConfig
-import torch
-device = "cuda" if torch.cuda.is_available() else "cpu"
-if torch.cuda.is_available():
-    torch_dtype = torch.float16
-else:
-    torch_dtype = torch.float32
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
-LoRA_path = 'new_model'
-# @spaces.GPU #[uncomment to use ZeroGPU]
-def infer(
-    model_id,
-    prompt,
-    negative_prompt,
-    seed,
-    randomize_seed,
-    width,
-    height,
-    guidance_scale,
-    num_inference_steps,
-    progress=gr.Progress(track_tqdm=True),
-):
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator().manual_seed(seed)
-    if model_id == 'Maria_Lashina_LoRA':
-        adapter_name = 'a cartoonish mouse'
-        unet_sub_dir = os.path.join(LoRA_path, "unet")
-        text_encoder_sub_dir = os.path.join(LoRA_path, "text_encoder")
-        pipe = DiffusionPipeline.from_pretrained('CompVis/stable-diffusion-v1-4', torch_dtype=torch_dtype).to(device)
-        pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)
-        pipe.text_encoder = PeftModel.from_pretrained(pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name)
-        if torch_dtype == torch.float16:
-            pipe.unet.half()
-            pipe.text_encoder.half()
-        pipe.to(device)
-    else:
-        pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch_dtype).to(device)
-    image = pipe(
-        prompt=prompt,
-        negative_prompt=negative_prompt,
-        guidance_scale=guidance_scale,
-        num_inference_steps=num_inference_steps,
-        width=width,
-        height=height,
-        generator=generator,
-    ).images[0]
-    return image, seed
 examples = [
     "The image of a cartoonish mouse eating from a red bowl of yellow triangle chips, her cheeks are full. The mouse is gray with big pink ears, small white eyes and a black pointed nose. It has a simple design, the background color is white. The style of the image is reminiscent of a sticker or a digital illustration.",
     "The image of a cartoonish mouse with red hearts instead of eyes meaning that the mouse is in love with something. The mouse is gray with big pink ears and a black pointed nose. It has a simple design, the background color is white. The style of the image is reminiscent of a sticker or a digital illustration.",
@@ -83,9 +18,15 @@ css = """
 }
 """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.Markdown(" # Text-to-Image Gradio Template")
         MODEL_LIST = [
             "CompVis/stable-diffusion-v1-4",
@@ -116,8 +57,33 @@ with gr.Blocks(css=css) as demo:
                 label="Negative prompt",
                 max_lines=1,
                 placeholder="Enter a negative prompt",
-                visible=False,
             )
             seed = gr.Slider(
                 label="Seed",
@@ -177,9 +143,16 @@ with gr.Blocks(css=css) as demo:
             height,
             guidance_scale,
             num_inference_steps,
         ],
         outputs=[result, seed],
     )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import numpy as np
+from infer import infer, CONTROLNET_MODE
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
 examples = [
     "The image of a cartoonish mouse eating from a red bowl of yellow triangle chips, her cheeks are full. The mouse is gray with big pink ears, small white eyes and a black pointed nose. It has a simple design, the background color is white. The style of the image is reminiscent of a sticker or a digital illustration.",
     "The image of a cartoonish mouse with red hearts instead of eyes meaning that the mouse is in love with something. The mouse is gray with big pink ears and a black pointed nose. It has a simple design, the background color is white. The style of the image is reminiscent of a sticker or a digital illustration.",
 }
 """
+def on_checkbox_change(use_advanced):
+    visible = use_advanced
+    return (gr.update(visible=visible, interactive=visible),
+            gr.update(visible=visible, interactive=visible),
+            gr.update(visible=visible, interactive=visible))
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown(" # Maria Lashina Text-to-Image Rat Stickers Generation App")
         MODEL_LIST = [
             "CompVis/stable-diffusion-v1-4",
                 label="Negative prompt",
                 max_lines=1,
                 placeholder="Enter a negative prompt",
+                visible=True,
+            )
+            use_controlnet = gr.Checkbox(label="Use ControlNet")
+            control_strength = gr.Slider(
+                label="ControlNet strength",
+                minimum=0,
+                maximum=1,
+                step=0.01,
+                value=0.8,
+                visible=False
+            )
+            controlnet_mode = gr.Dropdown(CONTROLNET_MODE.keys(), label="ControlNet mode", visible=False)
+            controlnet_image = gr.Image(label="ControlNet image", visible=False)
+            use_controlnet.change(on_checkbox_change, use_controlnet, [control_strength, controlnet_mode, controlnet_image])
+            use_ip_adapter = gr.Checkbox(label="Use IPAdapter")
+            ip_adapter_scale = gr.Slider(
+                label="IPAdapter scale",
+                minimum=0,
+                maximum=1,
+                step=0.01,
+                value=0.8,
+                visible=False
             )
+            ip_adapter_image = gr.Image(label="IPAdapter image", visible=False)
+            use_advanced_ip.change(on_checkbox_change, use_advanced_ip, [ip_adapter_scale, image_upload_ip])
             seed = gr.Slider(
                 label="Seed",
             height,
             guidance_scale,
             num_inference_steps,
+            use_controlnet,
+            controlnet_strength,
+            controlnet_mode,
+            controlnet_image,
+            use_ip_adapter,
+            ip_adapter_scale,
+            ip_adapter_image
         ],
         outputs=[result, seed],
     )
 if __name__ == "__main__":
+    demo.launch(share=False, debug=True)

infer.py ADDED Viewed

	@@ -0,0 +1,255 @@

+import numpy as np
+import torch
+import cv2 as cv
+import random
+import os
+import spaces
+import gradio as gr
+from transformers import pipeline
+from controlnet_aux import MLSDdetector, HEDdetector, NormalBaeDetector, LineartDetector
+from peft import PeftModel, LoraConfig
+from diffusers import (
+    DiffusionPipeline,
+    StableDiffusionPipeline,
+    StableDiffusionControlNetPipeline,
+    StableDiffusionControlNetImg2ImgPipeline,
+    DPMSolverMultistepScheduler,
+    PNDMScheduler,
+    ControlNetModel
+)
+from diffusers.callbacks import MultiPipelineCallbacks, PipelineCallback
+from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import rescale_noise_cfg, retrieve_timesteps
+from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
+from diffusers.utils.torch_utils import randn_tensor
+from diffusers.utils import load_image, make_image_grid
+device = "cuda" if torch.cuda.is_available() else "cpu"
+if torch.cuda.is_available():
+    torch_dtype = torch.float16
+else:
+    torch_dtype = torch.float32
+default_model = 'CompVis/stable-diffusion-v1-4'
+LoRA_path = 'new_model'
+CONTROLNET_MODE = {
+    "Canny Edge Detection" : "lllyasviel/control_v11p_sd15_canny",
+    "Pixel to Pixel": "lllyasviel/control_v11e_sd15_ip2p",
+    "HED edge detection (soft edge)" : "lllyasviel/control_sd15_hed",
+    "Midas depth estimation" : "lllyasviel/control_v11f1p_sd15_depth",
+    "Surface Normal Estimation" : "lllyasviel/control_v11p_sd15_normalbae",
+    "Scribble-Based Generation" : "lllyasviel/control_v11p_sd15_scribble",
+    "Line Art Generation": "lllyasviel/control_v11p_sd15_lineart",
+}
+def get_pipe(
+    model_id,
+    use_controlnet,
+    controlnet_mode,
+    use_ip_adapter
+):
+    if use_controlnet and use_ip_adapter:
+        print('Pipe with ControlNet and IPAdapter')
+        controlnet = ControlNetModel.from_pretrained(
+            CONTROLNET_MODE[controlnet_mode],
+            cache_dir="./models_cache",
+            torch_dtype=torch.float16
+        )
+        pipe = StableDiffusionControlNetPipeline.from_pretrained(
+            model_id if model_id!='Maria_Lashina_LoRA' else default_model,
+            torch_dtype=torch_dtype,
+            controlnet=use_controlnet,
+            safety_checker=None,
+        ).to(device)
+        pipe.load_ip_adapter(
+            "h94/IP-Adapter",
+            subfolder="models",
+            weight_name="ip-adapter-plus_sd14.bin",
+        )
+    elif controlnet:
+        print('Pipe with ControlNet')
+        controlnet = ControlNetModel.from_pretrained(
+            CONTROLNET_MODE[controlnet_mode],
+            cache_dir="./models_cache",
+            torch_dtype=torch.float16)
+        pipe = StableDiffusionControlNetPipeline.from_pretrained(
+            model_id if model_id!='Maria_Lashina_LoRA' else default_model,
+            torch_dtype=torch_dtype,
+            controlnet=use_controlnet,
+            safety_checker=None,
+        ).to(device)
+    elif ip_adapter:
+        print('Pipe with IpAdapter')
+        pipe = StableDiffusionPipeline.from_pretrained(
+            model_id if model_id!='Maria_Lashina_LoRA' else default_model,
+            torch_dtype=torch_dtype,
+            safety_checker=None,
+        ).to(device)
+        pipe.load_ip_adapter(
+            "h94/IP-Adapter",
+            subfolder="models",
+            weight_name="ip-adapter-plus_sd14.bin")
+    else:
+        print('Pipe with only SD')
+        pipe = StableDiffusionPipeline.from_pretrained(
+            model_id if model_id!='Maria_Lashina_LoRA' else default_model,
+            torch_dtype=torch_dtype,
+            safety_checker=None,
+        ).to(device)
+    if model_id == 'Maria_Lashina_LoRA':
+        adapter_name = 'a cartoonish mouse'
+        unet_sub_dir = os.path.join(LoRA_path, "unet")
+        text_encoder_sub_dir = os.path.join(LoRA_path, "text_encoder")
+        pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)
+        pipe.text_encoder = PeftModel.from_pretrained(pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name)
+        if torch_dtype == torch.float16:
+            pipe.unet.half()
+            pipe.text_encoder.half()
+    return pipe
+def prepare_controlnet_image(controlnet_image, mode):
+    if mode == "Canny Edge Detection":
+        image = cv.Canny(controlnet_image, 80, 160)
+        image = np.repeat(image[:, :, None], 3, axis=2)
+        image = Image.fromarray(image)
+    elif mode == "HED edge detection (soft edge)":
+        processor = HEDdetector.from_pretrained('lllyasviel/Annotators')
+        image = processor(controlnet_image)
+    elif mode == "Midas depth estimation":
+        depth_estimator = pipeline('depth-estimation')
+        image = depth_estimator(controlnet_image)['depth']
+        image = np.array(image)
+        image = image[:, :, None]
+        image = np.concatenate([image, image, image], axis=2)
+        image = Image.fromarray(image)
+    elif mode == "Surface Normal Estimation":
+        processor = NormalBaeDetector.from_pretrained("lllyasviel/Annotators")
+        image = processor(controlnet_image)
+    elif mode == "Scribble-Based Generation":
+        processor = HEDdetector.from_pretrained('lllyasviel/Annotators')
+        image = processor(controlnet_image, scribble=True)
+    elif mode == "Line Art Generation":
+        processor = LineartDetector.from_pretrained("lllyasviel/Annotators")
+        image = processor(controlnet_image)
+    else:
+        image = controlnet_image
+# @spaces.GPU #[uncomment to use ZeroGPU]
+def infer(
+    model_id,
+    prompt,
+    negative_prompt,
+    seed,
+    randomize_seed,
+    width,
+    height,
+    guidance_scale,
+    num_inference_steps,
+    use_controlnet,
+    controlnet_strength,
+    controlnet_mode,
+    controlnet_image,
+    use_ip_adapter,
+    ip_adapter_scale,
+    ip_adapter_image,
+    progress=gr.Progress(track_tqdm=True),
+):
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    generator = torch.Generator().manual_seed(seed)
+    if not use_controlnet and not use_ip_adapter:
+        pipe = get_pipe(model_id, use_controlnet, controlnet_mode, use_ip_adapter)
+        image = pipe(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            guidance_scale=guidance_scale,
+            num_inference_steps=num_inference_steps,
+            width=width,
+            height=height,
+            generator=generator
+        ).images[0]
+    elif use_controlnet and not use_ip_adapter:
+        cn_image = prepare_controlnet_image(controlnet_image, controlnet_mode)
+        pipe = get_pipe(model_id, use_controlnet, controlnet_mode, use_ip_adapter)
+        image = pipe(
+            prompt,
+            cn_image,
+            negative_prompt=negative_prompt,
+            num_inference_steps = num_inference_steps,
+            controlnet_conditioning_scale=control_strength,
+            generator=generator
+        ).images[0]
+    elif not use_controlnet and use_ip_adapter:
+        pipe = get_pipe(model_id, use_controlnet, controlnet_mode, use_ip_adapter)
+        pipe.set_ip_adapter_scale(ip_adapter_scale)
+        image = pipe(
+            prompt,
+            num_inference_steps=num_inference_steps,
+            guidance_scale=guidance_scale,
+            ip_adapter_image=ip_adapter_image,
+            generator=generator
+        ).images[0]
+    elif use_controlnet and use_ip_adapter:
+        cn_image = prepare_controlnet_image(controlnet_image, controlnet_mode)
+        pipe = get_pipe(model_id, use_controlnet, controlnet_mode, use_ip_adapter)
+        pipe.set_ip_adapter_scale(ip_adapter_scale)
+        image = pipe(
+            prompt,
+            cn_image,
+            num_inference_steps=num_inference_steps,
+            guidance_scale=guidance_scale,
+            height=height,
+            width=width,
+            controlnet_conditioning_scale=control_strength,
+            ip_adapter_image=image_upload_ip,
+            generator=generator,
+        ).images[0]
+    return image, seed