Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

inference.py +27 -7
inference2.py +4 -1
internals/pipelines/controlnets.py +265 -113
internals/pipelines/upscaler.py +1 -1
requirements.txt +2 -2

inference.py CHANGED Viewed

@@ -22,6 +22,7 @@ from internals.util.avatar import Avatar
 from internals.util.cache import auto_clear_cuda_and_gc, clear_cuda, clear_cuda_and_gc
 from internals.util.commons import download_image, upload_image, upload_images
 from internals.util.config import (
     get_model_dir,
     num_return_sequences,
     set_configs_from_task,
@@ -185,8 +186,15 @@ def scribble(task: Task):
     )
     lora_patcher.patch()
     kwargs = {
-        "imageUrl": task.get_imageUrl(),
         "seed": task.get_seed(),
         "num_inference_steps": task.get_steps(),
         "width": width,
@@ -305,19 +313,32 @@ def pose(task: Task, s3_outkey: str = "_pose", poses: Optional[list] = None):
     else:
         poses = [controlnet.detect_pose(task.get_imageUrl())] * num_return_sequences
-    depth = download_image(task.get_auxilary_imageUrl()).resize(
-        (task.get_width(), task.get_height())
-    )
-    depth = ControlNet.depth_image(depth)
     kwargs = {
         "prompt": prompt,
-        "image": [depth, poses[0]],
         "seed": task.get_seed(),
         "num_inference_steps": task.get_steps(),
         "negative_prompt": [task.get_negative_prompt()] * num_return_sequences,
         "width": width,
         "height": height,
         **task.cnp_kwargs(),
         **lora_patcher.kwargs(),
     }
@@ -336,7 +357,6 @@ def pose(task: Task, s3_outkey: str = "_pose", poses: Optional[list] = None):
         images, _ = high_res.apply(**kwargs)
     upload_image(poses[0], "crecoAI/{}_pose.png".format(task.get_taskId()))
-    upload_image(depth, "crecoAI/{}_depth.png".format(task.get_taskId()))
     generated_image_urls = upload_images(images, s3_outkey, task.get_taskId())

 from internals.util.cache import auto_clear_cuda_and_gc, clear_cuda, clear_cuda_and_gc
 from internals.util.commons import download_image, upload_image, upload_images
 from internals.util.config import (
+    get_is_sdxl,
     get_model_dir,
     num_return_sequences,
     set_configs_from_task,
     )
     lora_patcher.patch()
+    image = download_image(task.get_imageUrl()).resize((width, height))
+    if get_is_sdxl():
+        # We use sketch in SDXL
+        image = ControlNet.pidinet_image(image)
+    else:
+        image = ControlNet.scribble_image(image)
     kwargs = {
+        "image": [image] * num_return_sequences,
         "seed": task.get_seed(),
         "num_inference_steps": task.get_steps(),
         "width": width,
     else:
         poses = [controlnet.detect_pose(task.get_imageUrl())] * num_return_sequences
+    if not get_is_sdxl():
+        # in normal pipeline we use depth + pose controlnet
+        depth = download_image(task.get_auxilary_imageUrl()).resize(
+            (task.get_width(), task.get_height())
+        )
+        depth = ControlNet.depth_image(depth)
+        images = [depth, poses[0]]
+        upload_image(depth, "crecoAI/{}_depth.png".format(task.get_taskId()))
+        kwargs = {
+            "control_guidance_end": [0.5, 1.0],
+        }
+    else:
+        images = poses[0]
+        kwargs = {}
     kwargs = {
         "prompt": prompt,
+        "image": images,
         "seed": task.get_seed(),
         "num_inference_steps": task.get_steps(),
         "negative_prompt": [task.get_negative_prompt()] * num_return_sequences,
         "width": width,
         "height": height,
+        **kwargs,
         **task.cnp_kwargs(),
         **lora_patcher.kwargs(),
     }
         images, _ = high_res.apply(**kwargs)
     upload_image(poses[0], "crecoAI/{}_pose.png".format(task.get_taskId()))
     generated_image_urls = upload_images(images, s3_outkey, task.get_taskId())

inference2.py CHANGED Viewed

@@ -18,7 +18,7 @@ from internals.pipelines.replace_background import ReplaceBackground
 from internals.pipelines.safety_checker import SafetyChecker
 from internals.pipelines.upscaler import Upscaler
 from internals.util.avatar import Avatar
-from internals.util.cache import auto_clear_cuda_and_gc, clear_cuda
 from internals.util.commons import construct_default_s3_url, upload_image, upload_images
 from internals.util.config import (
     num_return_sequences,
@@ -218,6 +218,9 @@ def upscale_image(task: Task):
         )
     upload_image(BytesIO(out_img), output_key)
     return {"generated_image_url": construct_default_s3_url(output_key)}

 from internals.pipelines.safety_checker import SafetyChecker
 from internals.pipelines.upscaler import Upscaler
 from internals.util.avatar import Avatar
+from internals.util.cache import auto_clear_cuda_and_gc, clear_cuda, clear_cuda_and_gc
 from internals.util.commons import construct_default_s3_url, upload_image, upload_images
 from internals.util.config import (
     num_return_sequences,
         )
     upload_image(BytesIO(out_img), output_key)
+    clear_cuda_and_gc()
     return {"generated_image_url": construct_default_s3_url(output_key)}

internals/pipelines/controlnets.py CHANGED Viewed

@@ -1,19 +1,26 @@
-from typing import List, Literal, Union
 import cv2
 import numpy as np
 import torch
-from controlnet_aux import HEDdetector, LineartDetector, OpenposeDetector
 from diffusers import (
     ControlNetModel,
     DiffusionPipeline,
     StableDiffusionControlNetPipeline,
     StableDiffusionXLControlNetPipeline,
     UniPCMultistepScheduler,
 )
-from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_controlnet import (
-    MultiControlNetModel,
-)
 from PIL import Image
 from pydash import has
 from torch.nn import Linear
@@ -24,9 +31,6 @@ import internals.util.image as ImageUtil
 from external.midas import apply_midas
 from internals.data.result import Result
 from internals.pipelines.commons import AbstractPipeline
-from internals.pipelines.tileUpscalePipeline import (
-    StableDiffusionControlNetImg2ImgPipeline,
-)
 from internals.util.cache import clear_cuda_and_gc
 from internals.util.commons import download_image
 from internals.util.config import (
@@ -39,16 +43,91 @@ from internals.util.config import (
 CONTROLNET_TYPES = Literal["pose", "canny", "scribble", "linearart", "tile_upscaler"]
 class ControlNet(AbstractPipeline):
     __current_task_name = ""
     __loaded = False
-    __pipeline: AbstractPipeline
     def init(self, pipeline: AbstractPipeline):
-        self.__pipeline = pipeline
     def load_model(self, task_name: CONTROLNET_TYPES):
         config = self.__model_sdxl if get_is_sdxl() else self.__model_normal
         if self.__current_task_name == task_name:
             return
@@ -59,92 +138,116 @@ class ControlNet(AbstractPipeline):
             task_name = model  # pyright: ignore
             model = config[task_name]
-        # Multi controlnet
         if "," in model:
-            model_names = [m.strip() for m in model.split(",")]
-            controlnets = []
-            for name in model_names:
-                cn = ControlNetModel.from_pretrained(
-                    name,
-                    torch_dtype=torch.float16,
-                    cache_dir=get_hf_cache_dir(),
-                ).to("cuda")
-                controlnets.append(cn)
-            controlnet = MultiControlNetModel(controlnets).to("cuda")
-        # Single controlnet
-        else:
-            controlnet = ControlNetModel.from_pretrained(
-                model,
-                torch_dtype=torch.float16,
-                cache_dir=get_hf_cache_dir(),
-            ).to("cuda")
-        self.__current_task_name = task_name
-        self.controlnet = controlnet
-        self.__load()
-        if hasattr(self, "pipe"):
-            self.pipe.controlnet = controlnet
-        if hasattr(self, "pipe2"):
-            self.pipe2.controlnet = controlnet
         clear_cuda_and_gc()
-    def __load(self):
-        "Should not be called externally"
-        if self.__loaded:
-            return
-        if not hasattr(self, "controlnet"):
-            self.load_model("pose")
-        # controlnet pipeline for tile upscaler
-        if get_is_sdxl():
-            print("Warning: Tile upscale is not supported on SDXL")
-            if self.__pipeline:
-                pipe = StableDiffusionXLControlNetPipeline(
-                    controlnet=self.controlnet, **self.__pipeline.pipe.components
-                ).to("cuda")
             else:
-                pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
-                    get_model_dir(),
-                    controlnet=self.controlnet,
-                    torch_dtype=torch.float16,
-                    use_auth_token=get_hf_token(),
-                    cache_dir=get_hf_cache_dir(),
-                    use_safetensors=True,
-                ).to("cuda")
-            pipe.enable_vae_tiling()
-            pipe.enable_vae_slicing()
-            pipe.enable_xformers_memory_efficient_attention()
-            self.pipe2 = pipe
-        else:
-            if hasattr(self, "__pipeline"):
-                pipe = StableDiffusionControlNetImg2ImgPipeline(
-                    controlnet=self.controlnet, **self.__pipeline.pipe.components
-                ).to("cuda")
-            else:
-                pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
-                    get_model_dir(),
-                    controlnet=self.controlnet,
-                    torch_dtype=torch.float16,
-                    use_auth_token=get_hf_token(),
-                    cache_dir=get_hf_cache_dir(),
-                ).to("cuda")
-            # pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
-            pipe.enable_model_cpu_offload()
-            pipe.enable_xformers_memory_efficient_attention()
-            self.pipe = pipe
             # controlnet pipeline for canny and pose
-            pipe2 = StableDiffusionControlNetPipeline(**pipe.components).to("cuda")
-            pipe2.scheduler = UniPCMultistepScheduler.from_config(
-                pipe2.scheduler.config
             )
-            pipe2.enable_xformers_memory_efficient_attention()
-            self.pipe2 = pipe2
-        self.__loaded = True
     def process(self, **kwargs):
         if self.__current_task_name == "pose":
@@ -220,7 +323,6 @@ class ControlNet(AbstractPipeline):
             "num_inference_steps": num_inference_steps,
             "negative_prompt": negative_prompt[0],
             "guidance_scale": guidance_scale,
-            "control_guidance_end": [0.5, 1.0],
             "height": height,
             "width": width,
             **kwargs,
@@ -256,7 +358,7 @@ class ControlNet(AbstractPipeline):
         kwargs = {
             "image": condition_image,
             "prompt": prompt,
-            "controlnet_conditioning_image": condition_image,
             "num_inference_steps": num_inference_steps,
             "negative_prompt": negative_prompt,
             "height": condition_image.size[1],
@@ -270,7 +372,7 @@ class ControlNet(AbstractPipeline):
     @torch.inference_mode()
     def process_scribble(
         self,
-        imageUrl: Union[str, Image.Image],
         prompt: Union[str, List[str]],
         negative_prompt: Union[str, List[str]],
         num_inference_steps: int,
@@ -285,21 +387,25 @@ class ControlNet(AbstractPipeline):
         torch.manual_seed(seed)
-        if isinstance(imageUrl, Image.Image):
-            init_image = imageUrl.resize((width, height))
-        else:
-            init_image = download_image(imageUrl).resize((width, height))
-        condition_image = self.__scribble_condition_image(init_image)
         kwargs = {
-            "image": condition_image,
             "prompt": prompt,
             "num_inference_steps": num_inference_steps,
             "negative_prompt": negative_prompt,
             "height": height,
             "width": width,
             "guidance_scale": guidance_scale,
             **kwargs,
         }
         result = self.pipe2.__call__(**kwargs)
@@ -326,29 +432,35 @@ class ControlNet(AbstractPipeline):
         init_image = download_image(imageUrl).resize((width, height))
         condition_image = ControlNet.linearart_condition_image(init_image)
         kwargs = {
-            "image": condition_image,
             "prompt": prompt,
             "num_inference_steps": num_inference_steps,
             "negative_prompt": negative_prompt,
             "height": height,
             "width": width,
             "guidance_scale": guidance_scale,
             **kwargs,
         }
         result = self.pipe2.__call__(**kwargs)
         return Result.from_result(result)
     def cleanup(self):
-        if hasattr(self, "pipe") and hasattr(self.pipe, "controlnet"):
-            del self.pipe.controlnet
-        if hasattr(self, "pipe2") and hasattr(self.pipe2, "controlnet"):
-            del self.pipe2.controlnet
-        if hasattr(self, "controlnet"):
-            del self.controlnet
-        self.__current_task_name = ""
-        clear_cuda_and_gc()
     def detect_pose(self, imageUrl: str) -> Image.Image:
         detector = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
@@ -356,7 +468,8 @@ class ControlNet(AbstractPipeline):
         image = detector.__call__(image)
         return image
-    def __scribble_condition_image(self, image: Image.Image) -> Image.Image:
         processor = HEDdetector.from_pretrained("lllyasviel/Annotators")
         image = processor.__call__(input_image=image, scribble=True)
         return image
@@ -369,12 +482,36 @@ class ControlNet(AbstractPipeline):
     @staticmethod
     def depth_image(image: Image.Image) -> Image.Image:
-        depth = np.array(image)
-        depth = ImageUtil.HWC3(depth)
-        depth, _ = apply_midas(depth)
-        depth = ImageUtil.HWC3(depth)
-        depth = Image.fromarray(depth)
-        return depth
     @staticmethod
     def canny_detect_edge(image: Image.Image) -> Image.Image:
@@ -407,10 +544,25 @@ class ControlNet(AbstractPipeline):
         "scribble": "lllyasviel/control_v11p_sd15_scribble",
         "tile_upscaler": "lllyasviel/control_v11f1e_sd15_tile",
     }
     __model_sdxl = {
         "pose": "thibaud/controlnet-openpose-sdxl-1.0",
         "canny": "diffusers/controlnet-canny-sdxl-1.0",
-        "linearart": "canny",
-        "scribble": "canny",
         "tile_upscaler": None,
     }

+from typing import AbstractSet, List, Literal, Optional, Union
 import cv2
 import numpy as np
 import torch
+from controlnet_aux import (
+    HEDdetector,
+    LineartDetector,
+    OpenposeDetector,
+    PidiNetDetector,
+)
 from diffusers import (
     ControlNetModel,
     DiffusionPipeline,
+    StableDiffusionAdapterPipeline,
+    StableDiffusionControlNetImg2ImgPipeline,
     StableDiffusionControlNetPipeline,
+    StableDiffusionXLAdapterPipeline,
     StableDiffusionXLControlNetPipeline,
+    T2IAdapter,
     UniPCMultistepScheduler,
 )
+from diffusers.pipelines.controlnet import MultiControlNetModel
 from PIL import Image
 from pydash import has
 from torch.nn import Linear
 from external.midas import apply_midas
 from internals.data.result import Result
 from internals.pipelines.commons import AbstractPipeline
 from internals.util.cache import clear_cuda_and_gc
 from internals.util.commons import download_image
 from internals.util.config import (
 CONTROLNET_TYPES = Literal["pose", "canny", "scribble", "linearart", "tile_upscaler"]
+class StableDiffusionNetworkModelPipelineLoader:
+    """Loads the pipeline for network module, eg: controlnet or t2i.
+    Does not throw error in case of unsupported configurations, instead it returns None.
+    """
+    def __new__(
+        cls,
+        is_sdxl,
+        is_img2img,
+        network_model,
+        pipeline_type,
+        base_pipe: Optional[AbstractSet] = None,
+    ):
+        if is_sdxl and is_img2img:
+            # Does not matter pipeline type but tile upscale is not supported
+            print("Warning: Tile upscale is not supported on SDXL")
+            return None
+        if base_pipe is None:
+            pretrained = True
+            kwargs = {
+                "pretrained_model_name_or_path": get_model_dir(),
+                "torch_dtype": torch.float16,
+                "use_auth_token": get_hf_token(),
+                "cache_dir": get_hf_cache_dir(),
+            }
+        else:
+            pretrained = False
+            kwargs = {
+                **base_pipe.pipe.components,  # pyright: ignore
+            }
+        if is_sdxl and pipeline_type == "controlnet":
+            model = (
+                StableDiffusionXLControlNetPipeline.from_pretrained
+                if pretrained
+                else StableDiffusionXLControlNetPipeline
+            )
+            return model(controlnet=network_model, **kwargs).to("cuda")
+        if is_sdxl and pipeline_type == "t2i":
+            model = (
+                StableDiffusionXLAdapterPipeline.from_pretrained
+                if pretrained
+                else StableDiffusionXLAdapterPipeline
+            )
+            return model(adapter=network_model, **kwargs).to("cuda")
+        if is_img2img and pipeline_type == "controlnet":
+            model = (
+                StableDiffusionControlNetImg2ImgPipeline.from_pretrained
+                if pretrained
+                else StableDiffusionControlNetImg2ImgPipeline
+            )
+            return model(controlnet=network_model, **kwargs).to("cuda")
+        if pipeline_type == "controlnet":
+            model = (
+                StableDiffusionControlNetPipeline.from_pretrained
+                if pretrained
+                else StableDiffusionControlNetPipeline
+            )
+            return model(controlnet=network_model, **kwargs).to("cuda")
+        if pipeline_type == "t2i":
+            model = (
+                StableDiffusionAdapterPipeline.from_pretrained
+                if pretrained
+                else StableDiffusionAdapterPipeline
+            )
+            return model(adapter=network_model, **kwargs).to("cuda")
+        print(
+            f"Warning: Unsupported configuration {is_sdxl=}, {is_img2img=}, {pipeline_type=}"
+        )
+        return None
 class ControlNet(AbstractPipeline):
     __current_task_name = ""
     __loaded = False
+    __pipe_type = None
     def init(self, pipeline: AbstractPipeline):
+        setattr(self, "__pipeline", pipeline)
     def load_model(self, task_name: CONTROLNET_TYPES):
+        "Appropriately loads the network module, pipelines and cache it for reuse."
         config = self.__model_sdxl if get_is_sdxl() else self.__model_normal
         if self.__current_task_name == task_name:
             return
             task_name = model  # pyright: ignore
             model = config[task_name]
+        pipeline_type = (
+            self.__model_sdxl_types[task_name]
+            if get_is_sdxl()
+            else self.__model_normal_types[task_name]
+        )
         if "," in model:
+            model = [m.strip() for m in model.split(",")]
+        model = self.__load_network_model(model, pipeline_type)
+        self.__load_pipeline(model, pipeline_type)
+        self.network_model = model
+        self.__current_task_name = task_name
         clear_cuda_and_gc()
+    def __load_network_model(self, model_name, pipeline_type):
+        "Loads the network module, eg: ControlNet or T2I Adapters"
+        def load_controlnet(model):
+            return ControlNetModel.from_pretrained(
+                model,
+                torch_dtype=torch.float16,
+                cache_dir=get_hf_cache_dir(),
+            ).to("cuda")
+        def load_t2i(model):
+            return T2IAdapter.from_pretrained(
+                model,
+                torch_dtype=torch.float16,
+                varient="fp16",
+            ).to("cuda")
+        if type(model_name) == str:
+            if pipeline_type == "controlnet":
+                return load_controlnet(model_name)
+            if pipeline_type == "t2i":
+                return load_t2i(model_name)
+            raise Exception("Invalid pipeline type")
+        elif type(model_name) == list:
+            if pipeline_type == "controlnet":
+                cns = []
+                for model in model_name:
+                    cns.append(load_controlnet(model))
+                return MultiControlNetModel(cns).to("cuda")
+            elif pipeline_type == "t2i":
+                raise Exception("Multi T2I adapters are not supported")
+            raise Exception("Invalid pipeline type")
+    def __load_pipeline(self, network_model, pipeline_type):
+        "Load the base pipeline(s) (if not loaded already) based on pipeline type and attaches the network module to the pipeline"
+        def patch_pipe(pipe):
+            if not pipe:
+                # cases where the loader may return None
+                return None
+            if get_is_sdxl():
+                pipe.enable_vae_tiling()
+                pipe.enable_vae_slicing()
+                pipe.enable_xformers_memory_efficient_attention()
             else:
+                pipe.enable_xformers_memory_efficient_attention()
+            return pipe
+        # If the pipeline type is changed we should reload all
+        # the pipelines
+        if not self.__loaded or self.__pipe_type != pipeline_type:
+            # controlnet pipeline for tile upscaler
+            pipe = StableDiffusionNetworkModelPipelineLoader(
+                is_sdxl=get_is_sdxl(),
+                is_img2img=True,
+                network_model=network_model,
+                pipeline_type=pipeline_type,
+                base_pipe=getattr(self, "__pipeline", None),
+            )
+            pipe = patch_pipe(pipe)
+            if pipe:
+                self.pipe = pipe
             # controlnet pipeline for canny and pose
+            pipe2 = StableDiffusionNetworkModelPipelineLoader(
+                is_sdxl=get_is_sdxl(),
+                is_img2img=False,
+                network_model=network_model,
+                pipeline_type=pipeline_type,
+                base_pipe=getattr(self, "__pipeline", None),
             )
+            pipe2 = patch_pipe(pipe2)
+            if pipe2:
+                self.pipe2 = pipe2
+            self.__loaded = True
+            self.__pipe_type = pipeline_type
+        # Set the network module in the pipeline
+        if pipeline_type == "controlnet":
+            if hasattr(self, "pipe"):
+                setattr(self.pipe, "controlnet", network_model)
+            if hasattr(self, "pipe2"):
+                setattr(self.pipe2, "controlnet", network_model)
+        elif pipeline_type == "t2i":
+            if hasattr(self, "pipe"):
+                setattr(self.pipe, "adapter", network_model)
+            if hasattr(self, "pipe2"):
+                setattr(self.pipe2, "adapter", network_model)
+        clear_cuda_and_gc()
     def process(self, **kwargs):
         if self.__current_task_name == "pose":
             "num_inference_steps": num_inference_steps,
             "negative_prompt": negative_prompt[0],
             "guidance_scale": guidance_scale,
             "height": height,
             "width": width,
             **kwargs,
         kwargs = {
             "image": condition_image,
             "prompt": prompt,
+            "control_image": condition_image,
             "num_inference_steps": num_inference_steps,
             "negative_prompt": negative_prompt,
             "height": condition_image.size[1],
     @torch.inference_mode()
     def process_scribble(
         self,
+        image: List[Image.Image],
         prompt: Union[str, List[str]],
         negative_prompt: Union[str, List[str]],
         num_inference_steps: int,
         torch.manual_seed(seed)
+        sdxl_args = (
+            {
+                "guidance_scale": 6,
+                "adapter_conditioning_scale": 0.6,
+                "adapter_conditioning_factor": 1.0,
+            }
+            if get_is_sdxl()
+            else {}
+        )
         kwargs = {
+            "image": image,
             "prompt": prompt,
             "num_inference_steps": num_inference_steps,
             "negative_prompt": negative_prompt,
             "height": height,
             "width": width,
             "guidance_scale": guidance_scale,
+            **sdxl_args,
             **kwargs,
         }
         result = self.pipe2.__call__(**kwargs)
         init_image = download_image(imageUrl).resize((width, height))
         condition_image = ControlNet.linearart_condition_image(init_image)
+        # we use t2i adapter and the conditioning scale should always be 0.8
+        sdxl_args = (
+            {
+                "guidance_scale": 6,
+                "adapter_conditioning_scale": 0.5,
+                "adapter_conditioning_factor": 0.9,
+            }
+            if get_is_sdxl()
+            else {}
+        )
         kwargs = {
+            "image": [condition_image] * 4,
             "prompt": prompt,
             "num_inference_steps": num_inference_steps,
             "negative_prompt": negative_prompt,
             "height": height,
             "width": width,
             "guidance_scale": guidance_scale,
+            **sdxl_args,
             **kwargs,
         }
         result = self.pipe2.__call__(**kwargs)
         return Result.from_result(result)
     def cleanup(self):
+        """Doesn't do anything considering new diffusers has itself a cleanup mechanism
+        after controlnet generation"""
+        pass
     def detect_pose(self, imageUrl: str) -> Image.Image:
         detector = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
         image = detector.__call__(image)
         return image
+    @staticmethod
+    def scribble_image(image: Image.Image) -> Image.Image:
         processor = HEDdetector.from_pretrained("lllyasviel/Annotators")
         image = processor.__call__(input_image=image, scribble=True)
         return image
     @staticmethod
     def depth_image(image: Image.Image) -> Image.Image:
+        global midas, midas_transforms
+        if "midas" not in globals():
+            midas = torch.hub.load("intel-isl/MiDaS", "MiDaS").to("cuda")
+            midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
+        transform = midas_transforms.default_transform
+        cv_image = np.array(image)
+        img = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB)
+        input_batch = transform(img).to("cuda")
+        with torch.no_grad():
+            prediction = midas(input_batch)
+            prediction = torch.nn.functional.interpolate(
+                prediction.unsqueeze(1),
+                size=img.shape[:2],
+                mode="bicubic",
+                align_corners=False,
+            ).squeeze()
+        output = prediction.cpu().numpy()
+        formatted = (output * 255 / np.max(output)).astype("uint8")
+        img = Image.fromarray(formatted)
+        return img
+    @staticmethod
+    def pidinet_image(image: Image.Image) -> Image.Image:
+        pidinet = PidiNetDetector.from_pretrained("lllyasviel/Annotators").to("cuda")
+        image = pidinet.__call__(input_image=image, apply_filter=True)
+        return image
     @staticmethod
     def canny_detect_edge(image: Image.Image) -> Image.Image:
         "scribble": "lllyasviel/control_v11p_sd15_scribble",
         "tile_upscaler": "lllyasviel/control_v11f1e_sd15_tile",
     }
+    __model_normal_types = {
+        "pose": "controlnet",
+        "canny": "controlnet",
+        "linearart": "controlnet",
+        "scribble": "controlnet",
+        "tile_upscaler": "controlnet",
+    }
     __model_sdxl = {
         "pose": "thibaud/controlnet-openpose-sdxl-1.0",
         "canny": "diffusers/controlnet-canny-sdxl-1.0",
+        "linearart": "TencentARC/t2i-adapter-lineart-sdxl-1.0",
+        "scribble": "TencentARC/t2i-adapter-sketch-sdxl-1.0",
+        "tile_upscaler": None,
+    }
+    __model_sdxl_types = {
+        "pose": "controlnet",
+        "canny": "controlnet",
+        "linearart": "t2i",
+        "scribble": "t2i",
         "tile_upscaler": None,
     }

internals/pipelines/upscaler.py CHANGED Viewed

@@ -148,7 +148,7 @@ class Upscaler:
                 model=model,
                 half=False,
                 gpu_id="0",
-                tile=320,
                 tile_pad=10,
                 pre_pad=0,
             )

                 model=model,
                 half=False,
                 gpu_id="0",
+                tile=128,
                 tile_pad=10,
                 pre_pad=0,
             )

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
 boto3==1.24.61
 triton==2.0.0
-diffusers==0.19.0
 fastapi==0.87.0
 Pillow==9.3.0
 redis==4.3.4
@@ -9,7 +9,7 @@ transformers==4.34.1
 rembg==2.0.30
 gfpgan==1.3.8
 rembg==2.0.30
-controlnet-aux==0.0.5
 gfpgan>=1.3.4
 realesrgan==0.3.0
 compel==1.0.4

 boto3==1.24.61
 triton==2.0.0
+diffusers==0.23.0
 fastapi==0.87.0
 Pillow==9.3.0
 redis==4.3.4
 rembg==2.0.30
 gfpgan==1.3.8
 rembg==2.0.30
+controlnet-aux==0.0.7
 gfpgan>=1.3.4
 realesrgan==0.3.0
 compel==1.0.4