Upload folder using huggingface_hub

Browse files

Files changed (16) hide show

inference.py +82 -26
internals/data/task.py +4 -0
internals/pipelines/commons.py +11 -2
internals/pipelines/controlnets.py +17 -0
internals/pipelines/inpainter.py +10 -1
internals/pipelines/object_remove.py +7 -0
internals/pipelines/replace_background.py +12 -2
internals/pipelines/safety_checker.py +3 -2
internals/pipelines/sdxl_tile_upscale.py +14 -0
internals/pipelines/upscaler.py +5 -1
internals/util/config.py +22 -1
internals/util/model_loader.py +9 -1
internals/util/prompt.py +7 -6
models/ultrasharp/model.py +6 -4
models/ultrasharp/util.py +84 -3
requirements.txt +2 -2

inference.py CHANGED Viewed

@@ -38,8 +38,9 @@ from internals.util.commons import (
 )
 from internals.util.config import (
     get_is_sdxl,
     get_model_dir,
-    num_return_sequences,
     set_configs_from_task,
     set_model_config,
     set_root_dir,
@@ -54,7 +55,7 @@ torch.backends.cuda.matmul.allow_tf32 = True
 auto_mode = False
-prompt_modifier = PromptModifier(num_of_sequences=num_return_sequences)
 upscaler = Upscaler()
 pose_detector = PoseDetector()
 inpainter = InPainter()
@@ -128,7 +129,7 @@ def canny(task: Task):
         "negative_prompt": [
             f"monochrome, neon, x-ray, negative image, oversaturated, {task.get_negative_prompt()}"
         ]
-        * num_return_sequences,
         **task.cnc_kwargs(),
         **lora_patcher.kwargs(),
     }
@@ -136,7 +137,8 @@ def canny(task: Task):
     if task.get_high_res_fix():
         kwargs = {
             "prompt": prompt,
-            "negative_prompt": [task.get_negative_prompt()] * num_return_sequences,
             "images": images,
             "width": task.get_width(),
             "height": task.get_height(),
@@ -235,13 +237,13 @@ def scribble(task: Task):
         image = ControlNet.scribble_image(image)
     kwargs = {
-        "image": [image] * num_return_sequences,
         "seed": task.get_seed(),
         "num_inference_steps": task.get_steps(),
         "width": width,
         "height": height,
         "prompt": prompt,
-        "negative_prompt": [task.get_negative_prompt()] * num_return_sequences,
         **task.cns_kwargs(),
     }
     images, has_nsfw = controlnet.process(**kwargs)
@@ -249,7 +251,8 @@ def scribble(task: Task):
     if task.get_high_res_fix():
         kwargs = {
             "prompt": prompt,
-            "negative_prompt": [task.get_negative_prompt()] * num_return_sequences,
             "images": images,
             "width": task.get_width(),
             "height": task.get_height(),
@@ -292,7 +295,7 @@ def linearart(task: Task):
         "width": width,
         "height": height,
         "prompt": prompt,
-        "negative_prompt": [task.get_negative_prompt()] * num_return_sequences,
         **task.cnl_kwargs(),
     }
     images, has_nsfw = controlnet.process(**kwargs)
@@ -300,7 +303,8 @@ def linearart(task: Task):
     if task.get_high_res_fix():
         kwargs = {
             "prompt": prompt,
-            "negative_prompt": [task.get_negative_prompt()] * num_return_sequences,
             "images": images,
             "width": task.get_width(),
             "height": task.get_height(),
@@ -342,7 +346,7 @@ def pose(task: Task, s3_outkey: str = "_pose", poses: Optional[list] = None):
         pose = download_image(task.get_imageUrl()).resize(
             (task.get_width(), task.get_height())
         )
-        poses = [pose] * num_return_sequences
     elif task.get_pose_coordinates():
         infered_pose = pose_detector.transform(
             image=task.get_imageUrl(),
@@ -350,9 +354,11 @@ def pose(task: Task, s3_outkey: str = "_pose", poses: Optional[list] = None):
             width=task.get_width(),
             height=task.get_height(),
         )
-        poses = [infered_pose] * num_return_sequences
     else:
-        poses = [controlnet.detect_pose(task.get_imageUrl())] * num_return_sequences
     if not get_is_sdxl():
         # in normal pipeline we use depth + pose controlnet
@@ -376,7 +382,7 @@ def pose(task: Task, s3_outkey: str = "_pose", poses: Optional[list] = None):
         "image": images,
         "seed": task.get_seed(),
         "num_inference_steps": task.get_steps(),
-        "negative_prompt": [task.get_negative_prompt()] * num_return_sequences,
         "width": width,
         "height": height,
         **kwargs,
@@ -388,7 +394,8 @@ def pose(task: Task, s3_outkey: str = "_pose", poses: Optional[list] = None):
     if task.get_high_res_fix():
         kwargs = {
             "prompt": prompt,
-            "negative_prompt": [task.get_negative_prompt()] * num_return_sequences,
             "images": images,
             "width": task.get_width(),
             "height": task.get_height(),
@@ -439,8 +446,11 @@ def text2img(task: Task):
     if task.get_high_res_fix():
         kwargs = {
-            "prompt": params.prompt if params.prompt else [""] * num_return_sequences,
-            "negative_prompt": [task.get_negative_prompt()] * num_return_sequences,
             "images": images,
             "width": task.get_width(),
             "height": task.get_height(),
@@ -486,7 +496,8 @@ def img2img(task: Task):
             "width": width,
             "height": height,
             "prompt": prompt,
-            "negative_prompt": [task.get_negative_prompt()] * num_return_sequences,
             **task.cnl_kwargs(),
             "adapter_conditioning_scale": 0.3,
         }
@@ -500,7 +511,8 @@ def img2img(task: Task):
         kwargs = {
             "prompt": prompt,
             "imageUrl": task.get_imageUrl(),
-            "negative_prompt": [task.get_negative_prompt()] * num_return_sequences,
             "num_inference_steps": task.get_steps(),
             "width": width,
             "height": height,
@@ -512,7 +524,8 @@ def img2img(task: Task):
     if task.get_high_res_fix():
         kwargs = {
             "prompt": prompt,
-            "negative_prompt": [task.get_negative_prompt()] * num_return_sequences,
             "images": images,
             "width": task.get_width(),
             "height": task.get_height(),
@@ -535,7 +548,12 @@ def img2img(task: Task):
 @update_db
 @slack.auto_send_alert
 def inpaint(task: Task):
-    prompt, _ = get_patched_prompt(task)
     print({"prompts": prompt})
@@ -546,13 +564,13 @@ def inpaint(task: Task):
         "width": task.get_width(),
         "height": task.get_height(),
         "seed": task.get_seed(),
-        "negative_prompt": [task.get_negative_prompt()] * num_return_sequences,
         "num_inference_steps": task.get_steps(),
         **task.ip_kwargs(),
     }
     images = inpainter.process(**kwargs)
-    generated_image_urls = upload_images(images, "_inpaint", task.get_taskId())
     clear_cuda_and_gc()
@@ -566,7 +584,7 @@ def replace_bg(task: Task):
     if task.is_prompt_engineering():
         prompt = prompt_modifier.modify(prompt)
     else:
-        prompt = [prompt] * num_return_sequences
     lora_patcher = lora_style.get_patcher(replace_background.pipe, task.get_style())
     lora_patcher.patch()
@@ -574,7 +592,7 @@ def replace_bg(task: Task):
     images, has_nsfw = replace_background.replace(
         image=task.get_imageUrl(),
         prompt=prompt,
-        negative_prompt=[task.get_negative_prompt()] * num_return_sequences,
         seed=task.get_seed(),
         width=task.get_width(),
         height=task.get_height(),
@@ -749,11 +767,13 @@ def load_model_by_task(task_type: TaskType, model_id=-1):
         inpainter.init(text2img_pipe)
         controlnet.init(text2img_pipe)
-    if task_type == TaskType.INPAINT:
         inpainter.load()
         safety_checker.apply(inpainter)
     elif task_type == TaskType.REPLACE_BG:
-        replace_background.load(base=text2img_pipe, high_res=high_res)
     elif task_type == TaskType.RT_DRAW_SEG or task_type == TaskType.RT_DRAW_IMG:
         realtime_draw.load(text2img_pipe)
     elif task_type == TaskType.OBJECT_REMOVAL:
@@ -776,6 +796,28 @@ def load_model_by_task(task_type: TaskType, model_id=-1):
             controlnet.load_model("pose")
 def apply_safety_checkers():
     safety_checker.apply(text2img_pipe)
     safety_checker.apply(img2img_pipe)
@@ -801,6 +843,18 @@ def model_fn(model_dir):
     return
 @FailureHandler.clear
 def predict_fn(data, pipe):
     task = Task(data)
@@ -851,6 +905,8 @@ def predict_fn(data, pipe):
             return tile_upscale(task)
         elif task_type == TaskType.INPAINT:
             return inpaint(task)
         elif task_type == TaskType.SCRIBBLE:
             return scribble(task)
         elif task_type == TaskType.LINEARART:

 )
 from internals.util.config import (
     get_is_sdxl,
+    get_low_gpu_mem,
     get_model_dir,
+    get_num_return_sequences,
     set_configs_from_task,
     set_model_config,
     set_root_dir,
 auto_mode = False
+prompt_modifier = PromptModifier(num_of_sequences=get_num_return_sequences())
 upscaler = Upscaler()
 pose_detector = PoseDetector()
 inpainter = InPainter()
         "negative_prompt": [
             f"monochrome, neon, x-ray, negative image, oversaturated, {task.get_negative_prompt()}"
         ]
+        * get_num_return_sequences(),
         **task.cnc_kwargs(),
         **lora_patcher.kwargs(),
     }
     if task.get_high_res_fix():
         kwargs = {
             "prompt": prompt,
+            "negative_prompt": [task.get_negative_prompt()]
+            * get_num_return_sequences(),
             "images": images,
             "width": task.get_width(),
             "height": task.get_height(),
         image = ControlNet.scribble_image(image)
     kwargs = {
+        "image": [image] * get_num_return_sequences(),
         "seed": task.get_seed(),
         "num_inference_steps": task.get_steps(),
         "width": width,
         "height": height,
         "prompt": prompt,
+        "negative_prompt": [task.get_negative_prompt()] * get_num_return_sequences(),
         **task.cns_kwargs(),
     }
     images, has_nsfw = controlnet.process(**kwargs)
     if task.get_high_res_fix():
         kwargs = {
             "prompt": prompt,
+            "negative_prompt": [task.get_negative_prompt()]
+            * get_num_return_sequences(),
             "images": images,
             "width": task.get_width(),
             "height": task.get_height(),
         "width": width,
         "height": height,
         "prompt": prompt,
+        "negative_prompt": [task.get_negative_prompt()] * get_num_return_sequences(),
         **task.cnl_kwargs(),
     }
     images, has_nsfw = controlnet.process(**kwargs)
     if task.get_high_res_fix():
         kwargs = {
             "prompt": prompt,
+            "negative_prompt": [task.get_negative_prompt()]
+            * get_num_return_sequences(),
             "images": images,
             "width": task.get_width(),
             "height": task.get_height(),
         pose = download_image(task.get_imageUrl()).resize(
             (task.get_width(), task.get_height())
         )
+        poses = [pose] * get_num_return_sequences()
     elif task.get_pose_coordinates():
         infered_pose = pose_detector.transform(
             image=task.get_imageUrl(),
             width=task.get_width(),
             height=task.get_height(),
         )
+        poses = [infered_pose] * get_num_return_sequences()
     else:
+        poses = [
+            controlnet.detect_pose(task.get_imageUrl())
+        ] * get_num_return_sequences()
     if not get_is_sdxl():
         # in normal pipeline we use depth + pose controlnet
         "image": images,
         "seed": task.get_seed(),
         "num_inference_steps": task.get_steps(),
+        "negative_prompt": [task.get_negative_prompt()] * get_num_return_sequences(),
         "width": width,
         "height": height,
         **kwargs,
     if task.get_high_res_fix():
         kwargs = {
             "prompt": prompt,
+            "negative_prompt": [task.get_negative_prompt()]
+            * get_num_return_sequences(),
             "images": images,
             "width": task.get_width(),
             "height": task.get_height(),
     if task.get_high_res_fix():
         kwargs = {
+            "prompt": params.prompt
+            if params.prompt
+            else [""] * get_num_return_sequences(),
+            "negative_prompt": [task.get_negative_prompt()]
+            * get_num_return_sequences(),
             "images": images,
             "width": task.get_width(),
             "height": task.get_height(),
             "width": width,
             "height": height,
             "prompt": prompt,
+            "negative_prompt": [task.get_negative_prompt()]
+            * get_num_return_sequences(),
             **task.cnl_kwargs(),
             "adapter_conditioning_scale": 0.3,
         }
         kwargs = {
             "prompt": prompt,
             "imageUrl": task.get_imageUrl(),
+            "negative_prompt": [task.get_negative_prompt()]
+            * get_num_return_sequences(),
             "num_inference_steps": task.get_steps(),
             "width": width,
             "height": height,
     if task.get_high_res_fix():
         kwargs = {
             "prompt": prompt,
+            "negative_prompt": [task.get_negative_prompt()]
+            * get_num_return_sequences(),
             "images": images,
             "width": task.get_width(),
             "height": task.get_height(),
 @update_db
 @slack.auto_send_alert
 def inpaint(task: Task):
+    if task.get_type() == TaskType.OUTPAINT:
+        key = "_outpaint"
+        prompt = [img2text.process(task.get_imageUrl())] * num_return_sequences
+    else:
+        key = "_inpaint"
+        prompt, _ = get_patched_prompt(task)
     print({"prompts": prompt})
         "width": task.get_width(),
         "height": task.get_height(),
         "seed": task.get_seed(),
+        "negative_prompt": [task.get_negative_prompt()] * get_num_return_sequences(),
         "num_inference_steps": task.get_steps(),
         **task.ip_kwargs(),
     }
     images = inpainter.process(**kwargs)
+    generated_image_urls = upload_images(images, key, task.get_taskId())
     clear_cuda_and_gc()
     if task.is_prompt_engineering():
         prompt = prompt_modifier.modify(prompt)
     else:
+        prompt = [prompt] * get_num_return_sequences()
     lora_patcher = lora_style.get_patcher(replace_background.pipe, task.get_style())
     lora_patcher.patch()
     images, has_nsfw = replace_background.replace(
         image=task.get_imageUrl(),
         prompt=prompt,
+        negative_prompt=[task.get_negative_prompt()] * get_num_return_sequences(),
         seed=task.get_seed(),
         width=task.get_width(),
         height=task.get_height(),
         inpainter.init(text2img_pipe)
         controlnet.init(text2img_pipe)
+    if task_type == TaskType.INPAINT or task_type == TaskType.OUTPAINT:
         inpainter.load()
         safety_checker.apply(inpainter)
     elif task_type == TaskType.REPLACE_BG:
+        replace_background.load(
+            upscaler=upscaler, base=text2img_pipe, high_res=high_res
+        )
     elif task_type == TaskType.RT_DRAW_SEG or task_type == TaskType.RT_DRAW_IMG:
         realtime_draw.load(text2img_pipe)
     elif task_type == TaskType.OBJECT_REMOVAL:
             controlnet.load_model("pose")
+def unload_model_by_task(task_type: TaskType):
+    if task_type == TaskType.INPAINT or task_type == TaskType.OUTPAINT:
+        inpainter.unload()
+    elif task_type == TaskType.REPLACE_BG:
+        replace_background.unload()
+    elif task_type == TaskType.OBJECT_REMOVAL:
+        object_removal.unload()
+    elif task_type == TaskType.TILE_UPSCALE:
+        if get_is_sdxl():
+            sdxl_tileupscaler.unload()
+        else:
+            controlnet.unload()
+    elif task_type == TaskType.CANNY:
+        controlnet.unload()
+    elif task_type == TaskType.SCRIBBLE:
+        controlnet.unload()
+    elif task_type == TaskType.LINEARART:
+        controlnet.unload()
+    elif task_type == TaskType.POSE:
+        controlnet.unload()
 def apply_safety_checkers():
     safety_checker.apply(text2img_pipe)
     safety_checker.apply(img2img_pipe)
     return
+def auto_unload_task(func):
+    def wrapper(*args, **kwargs):
+        result = func(*args, **kwargs)
+        if get_low_gpu_mem():
+            task = Task(args[0])
+            unload_model_by_task(task.get_type())  # pyright: ignore
+        return result
+    return wrapper
+@auto_unload_task
 @FailureHandler.clear
 def predict_fn(data, pipe):
     task = Task(data)
             return tile_upscale(task)
         elif task_type == TaskType.INPAINT:
             return inpaint(task)
+        elif task_type == TaskType.OUTPAINT:
+            return inpaint(task)
         elif task_type == TaskType.SCRIBBLE:
             return scribble(task)
         elif task_type == TaskType.LINEARART:

internals/data/task.py CHANGED Viewed

@@ -23,6 +23,7 @@ class TaskType(Enum):
     PRELOAD_MODEL = "PRELOAD_MODEL"
     CUSTOM_ACTION = "CUSTOM_ACTION"
     SYSTEM_CMD = "SYSTEM_CMD"
 class ModelType(Enum):
@@ -140,6 +141,9 @@ class Task:
     def get_nsfw_threshold(self) -> float:
         return self.__data.get("nsfw_threshold", 0.03)
     def can_access_nsfw(self) -> bool:
         return self.__data.get("can_access_nsfw", False)

     PRELOAD_MODEL = "PRELOAD_MODEL"
     CUSTOM_ACTION = "CUSTOM_ACTION"
     SYSTEM_CMD = "SYSTEM_CMD"
+    OUTPAINT = "OUTPAINT"
 class ModelType(Enum):
     def get_nsfw_threshold(self) -> float:
         return self.__data.get("nsfw_threshold", 0.03)
+    def get_num_return_sequences(self) -> int:
+        return self.__data.get("num_return_sequences", 4)
     def can_access_nsfw(self) -> bool:
         return self.__data.get("can_access_nsfw", False)

internals/pipelines/commons.py CHANGED Viewed

@@ -12,7 +12,12 @@ from diffusers import (
 from internals.data.result import Result
 from internals.pipelines.twoStepPipeline import two_step_pipeline
 from internals.util.commons import disable_safety_checker, download_image
-from internals.util.config import get_hf_token, get_is_sdxl, num_return_sequences
 class AbstractPipeline:
@@ -41,6 +46,7 @@ class Text2Img(AbstractPipeline):
                 torch_dtype=torch.float16,
                 use_auth_token=get_hf_token(),
                 use_safetensors=True,
             )
             pipe.vae = vae
             pipe.to("cuda")
@@ -104,18 +110,20 @@ class Text2Img(AbstractPipeline):
                 print("Warning: Two step pipeline is not supported on SDXL")
                 kwargs = {
                     "prompt": modified_prompt,
                 }
             else:
                 kwargs = {
                     "prompt": prompt,
                     "modified_prompts": modified_prompt,
                     "iteration": iteration,
                 }
             kwargs = {
                 "height": height,
                 "width": width,
-                "negative_prompt": [negative_prompt or ""] * num_return_sequences,
                 "num_inference_steps": num_inference_steps,
                 **kwargs,
             }
@@ -136,6 +144,7 @@ class Img2Img(AbstractPipeline):
                 model_dir,
                 torch_dtype=torch.float16,
                 use_auth_token=get_hf_token(),
                 use_safetensors=True,
             ).to("cuda")
         else:

 from internals.data.result import Result
 from internals.pipelines.twoStepPipeline import two_step_pipeline
 from internals.util.commons import disable_safety_checker, download_image
+from internals.util.config import (
+    get_base_model_variant,
+    get_hf_token,
+    get_is_sdxl,
+    num_return_sequences,
+)
 class AbstractPipeline:
                 torch_dtype=torch.float16,
                 use_auth_token=get_hf_token(),
                 use_safetensors=True,
+                variant=get_base_model_variant(),
             )
             pipe.vae = vae
             pipe.to("cuda")
                 print("Warning: Two step pipeline is not supported on SDXL")
                 kwargs = {
                     "prompt": modified_prompt,
+                    **kwargs,
                 }
             else:
                 kwargs = {
                     "prompt": prompt,
                     "modified_prompts": modified_prompt,
                     "iteration": iteration,
+                    **kwargs,
                 }
             kwargs = {
                 "height": height,
                 "width": width,
+                "negative_prompt": [negative_prompt or ""] * get_num_return_sequences(),
                 "num_inference_steps": num_inference_steps,
                 **kwargs,
             }
                 model_dir,
                 torch_dtype=torch.float16,
                 use_auth_token=get_hf_token(),
+                variant=get_base_model_variant(),
                 use_safetensors=True,
             ).to("cuda")
         else:

internals/pipelines/controlnets.py CHANGED Viewed

@@ -126,6 +126,23 @@ class ControlNet(AbstractPipeline):
     def init(self, pipeline: AbstractPipeline):
         setattr(self, "__pipeline", pipeline)
     def load_model(self, task_name: CONTROLNET_TYPES):
         "Appropriately loads the network module, pipelines and cache it for reuse."

     def init(self, pipeline: AbstractPipeline):
         setattr(self, "__pipeline", pipeline)
+    def unload(self):
+        "Unloads the network module, pipelines and clears the cache."
+        if not self.__loaded:
+            return
+        self.__loaded = False
+        self.__pipe_type = None
+        self.__current_task_name = ""
+        if hasattr(self, "pipe"):
+            delattr(self, "pipe")
+        if hasattr(self, "pipe2"):
+            delattr(self, "pipe2")
+        clear_cuda_and_gc()
     def load_model(self, task_name: CONTROLNET_TYPES):
         "Appropriately loads the network module, pipelines and cache it for reuse."

internals/pipelines/inpainter.py CHANGED Viewed

@@ -4,12 +4,14 @@ import torch
 from diffusers import StableDiffusionInpaintPipeline, StableDiffusionXLInpaintPipeline
 from internals.pipelines.commons import AbstractPipeline
 from internals.util.commons import disable_safety_checker, download_image
 from internals.util.config import (
     get_hf_cache_dir,
     get_hf_token,
-    get_is_sdxl,
     get_inpaint_model_path,
     get_model_dir,
 )
@@ -35,6 +37,7 @@ class InPainter(AbstractPipeline):
                 torch_dtype=torch.float16,
                 cache_dir=get_hf_cache_dir(),
                 use_auth_token=get_hf_token(),
             ).to("cuda")
         else:
             self.pipe = StableDiffusionInpaintPipeline.from_pretrained(
@@ -69,6 +72,11 @@ class InPainter(AbstractPipeline):
             self.pipe.enable_vae_slicing()
         self.pipe.enable_xformers_memory_efficient_attention()
     @torch.inference_mode()
     def process(
         self,
@@ -95,6 +103,7 @@ class InPainter(AbstractPipeline):
             "width": width,
             "negative_prompt": negative_prompt,
             "num_inference_steps": num_inference_steps,
             **kwargs,
         }
         return self.pipe.__call__(**kwargs).images

 from diffusers import StableDiffusionInpaintPipeline, StableDiffusionXLInpaintPipeline
 from internals.pipelines.commons import AbstractPipeline
+from internals.util.cache import clear_cuda_and_gc
 from internals.util.commons import disable_safety_checker, download_image
 from internals.util.config import (
+    get_base_inpaint_model_variant,
     get_hf_cache_dir,
     get_hf_token,
     get_inpaint_model_path,
+    get_is_sdxl,
     get_model_dir,
 )
                 torch_dtype=torch.float16,
                 cache_dir=get_hf_cache_dir(),
                 use_auth_token=get_hf_token(),
+                variant=get_base_inpaint_model_variant(),
             ).to("cuda")
         else:
             self.pipe = StableDiffusionInpaintPipeline.from_pretrained(
             self.pipe.enable_vae_slicing()
         self.pipe.enable_xformers_memory_efficient_attention()
+    def unload(self):
+        self.__loaded = False
+        self.pipe = None
+        clear_cuda_and_gc()
     @torch.inference_mode()
     def process(
         self,
             "width": width,
             "negative_prompt": negative_prompt,
             "num_inference_steps": num_inference_steps,
+            "strength": 1.0,
             **kwargs,
         }
         return self.pipe.__call__(**kwargs).images

internals/pipelines/object_remove.py CHANGED Viewed

@@ -10,6 +10,7 @@ from omegaconf import OmegaConf
 from PIL import Image
 from torch.utils.data._utils.collate import default_collate
 from internals.util.commons import download_file, download_image
 from internals.util.config import get_root_dir
 from saicinpainting.evaluation.utils import move_to_device
@@ -42,6 +43,12 @@ class ObjectRemoval:
         self.__loaded = True
     @torch.no_grad()
     def process(
         self,

 from PIL import Image
 from torch.utils.data._utils.collate import default_collate
+from internals.util.cache import clear_cuda_and_gc
 from internals.util.commons import download_file, download_image
 from internals.util.config import get_root_dir
 from saicinpainting.evaluation.utils import move_to_device
         self.__loaded = True
+    def unload(self):
+        self.__loaded = False
+        self.model = None
+        clear_cuda_and_gc()
     @torch.no_grad()
     def process(
         self,

internals/pipelines/replace_background.py CHANGED Viewed

@@ -6,21 +6,22 @@ from cv2 import inpaint
 from diffusers import (
     ControlNetModel,
     StableDiffusionControlNetInpaintPipeline,
-    StableDiffusionInpaintPipeline,
     StableDiffusionControlNetPipeline,
     UniPCMultistepScheduler,
 )
 from PIL import Image, ImageFilter, ImageOps
-from internals.data.task import ModelType
 import internals.util.image as ImageUtil
 from internals.data.result import Result
 from internals.pipelines.commons import AbstractPipeline
 from internals.pipelines.controlnets import ControlNet
 from internals.pipelines.high_res import HighRes
 from internals.pipelines.inpainter import InPainter
 from internals.pipelines.remove_background import RemoveBackgroundV2
 from internals.pipelines.upscaler import Upscaler
 from internals.util.commons import download_image
 from internals.util.config import (
     get_hf_cache_dir,
@@ -82,6 +83,15 @@ class ReplaceBackground(AbstractPipeline):
         self.__loaded = True
     @torch.inference_mode()
     def replace(
         self,

 from diffusers import (
     ControlNetModel,
     StableDiffusionControlNetInpaintPipeline,
     StableDiffusionControlNetPipeline,
+    StableDiffusionInpaintPipeline,
     UniPCMultistepScheduler,
 )
 from PIL import Image, ImageFilter, ImageOps
 import internals.util.image as ImageUtil
 from internals.data.result import Result
+from internals.data.task import ModelType
 from internals.pipelines.commons import AbstractPipeline
 from internals.pipelines.controlnets import ControlNet
 from internals.pipelines.high_res import HighRes
 from internals.pipelines.inpainter import InPainter
 from internals.pipelines.remove_background import RemoveBackgroundV2
 from internals.pipelines.upscaler import Upscaler
+from internals.util.cache import clear_cuda_and_gc
 from internals.util.commons import download_image
 from internals.util.config import (
     get_hf_cache_dir,
         self.__loaded = True
+    def unload(self):
+        self.__loaded = False
+        self.pipe = None
+        self.high_res = None
+        self.upscaler = None
+        self.remove_background = None
+        clear_cuda_and_gc()
     @torch.inference_mode()
     def replace(
         self,

internals/pipelines/safety_checker.py CHANGED Viewed

@@ -31,9 +31,10 @@ class SafetyChecker:
         self.__loaded = True
     def apply(self, pipeline: AbstractPipeline):
-        self.load()
         model = self.model if not get_nsfw_access() else None
         if not pipeline:
             return
         if hasattr(pipeline, "pipe"):

         self.__loaded = True
     def apply(self, pipeline: AbstractPipeline):
         model = self.model if not get_nsfw_access() else None
+        if model:
+            self.load()
         if not pipeline:
             return
         if hasattr(pipeline, "pipe"):

internals/pipelines/sdxl_tile_upscale.py CHANGED Viewed

@@ -10,6 +10,7 @@ from internals.pipelines.commons import AbstractPipeline, Text2Img
 from internals.pipelines.controlnets import ControlNet
 from internals.pipelines.demofusion_sdxl import DemoFusionSDXLControlNetPipeline
 from internals.pipelines.high_res import HighRes
 from internals.util.commons import download_image
 from internals.util.config import get_base_dimension
@@ -17,7 +18,11 @@ controlnet = ControlNet()
 class SDXLTileUpscaler(AbstractPipeline):
     def create(self, high_res: HighRes, pipeline: Text2Img, model_id: int):
         # temporal hack for upscale model till multicontrolnet support is added
         model = (
             "thibaud/controlnet-openpose-sdxl-1.0"
@@ -38,6 +43,15 @@ class SDXLTileUpscaler(AbstractPipeline):
         self.pipe = pipe
     def process(
         self,
         prompt: str,

 from internals.pipelines.controlnets import ControlNet
 from internals.pipelines.demofusion_sdxl import DemoFusionSDXLControlNetPipeline
 from internals.pipelines.high_res import HighRes
+from internals.util.cache import clear_cuda_and_gc
 from internals.util.commons import download_image
 from internals.util.config import get_base_dimension
 class SDXLTileUpscaler(AbstractPipeline):
+    __loaded = False
     def create(self, high_res: HighRes, pipeline: Text2Img, model_id: int):
+        if self.__loaded:
+            return
         # temporal hack for upscale model till multicontrolnet support is added
         model = (
             "thibaud/controlnet-openpose-sdxl-1.0"
         self.pipe = pipe
+        self.__loaded = True
+    def unload(self):
+        self.__loaded = False
+        self.pipe = None
+        self.high_res = None
+        clear_cuda_and_gc()
     def process(
         self,
         prompt: str,

internals/pipelines/upscaler.py CHANGED Viewed

@@ -139,7 +139,11 @@ class Upscaler:
         os.chdir(str(Path.home() / ".cache"))
         if scale == 4:
             print("Using 4x-Ultrasharp")
-            upsampler = Ultrasharp(self.__model_path_4x_ultrasharp)
         else:
             print("Using RealESRGANer")
             upsampler = RealESRGANer(

         os.chdir(str(Path.home() / ".cache"))
         if scale == 4:
             print("Using 4x-Ultrasharp")
+            upsampler = Ultrasharp(
+                model_path=self.__model_path_4x_ultrasharp,
+                tile=320,
+                tile_pad=10,
+            )
         else:
             print("Using RealESRGANer")
             upsampler = RealESRGANer(

internals/util/config.py CHANGED Viewed

@@ -45,7 +45,7 @@ def set_model_config(config: ModelConfig):
 def set_configs_from_task(task: Task):
-    global env, nsfw_threshold, nsfw_access, access_token, base_dimension
     name = task.get_queue_name()
     if name.startswith("gamma"):
         env = "gamma"
@@ -55,6 +55,7 @@ def set_configs_from_task(task: Task):
     nsfw_access = task.can_access_nsfw()
     access_token = task.get_access_token()
     base_dimension = task.get_base_dimension()
 def get_model_dir():
@@ -84,6 +85,11 @@ def get_root_dir():
     return root_dir
 def get_environment():
     global env
     return env
@@ -104,6 +110,21 @@ def get_hf_token():
     return hf_token
 def api_headers():
     return {
         "Access-Token": access_token,

 def set_configs_from_task(task: Task):
+    global env, nsfw_threshold, nsfw_access, access_token, base_dimension, num_return_sequences
     name = task.get_queue_name()
     if name.startswith("gamma"):
         env = "gamma"
     nsfw_access = task.can_access_nsfw()
     access_token = task.get_access_token()
     base_dimension = task.get_base_dimension()
+    num_return_sequences = task.get_num_return_sequences()
 def get_model_dir():
     return root_dir
+def get_num_return_sequences():
+    global num_return_sequences
+    return num_return_sequences
 def get_environment():
     global env
     return env
     return hf_token
+def get_low_gpu_mem():
+    global model_config
+    return model_config.low_gpu_mem  # pyright: ignore
+def get_base_model_variant():
+    global model_config
+    return model_config.get_base_model_variant  # pyright: ignore
+def get_base_inpaint_model_variant():
+    global model_config
+    return model_config.base_inpaint_model_variant  # pyright: ignore
 def api_headers():
     return {
         "Access-Token": access_token,

internals/util/model_loader.py CHANGED Viewed

@@ -16,6 +16,9 @@ class ModelConfig:
     base_inpaint_model_path: str
     is_sdxl: bool = False
     base_dimension: int = 512
 def load_model_from_config(path):
@@ -24,14 +27,19 @@ def load_model_from_config(path):
         with open(path + "/inference.json", "r") as f:
             config = json.loads(f.read())
             model_path = config.get("model_path", path)
-            inpaint_model_path = config.get("inpaint_model_path", path)
             is_sdxl = config.get("is_sdxl", False)
             base_dimension = config.get("base_dimension", 512)
             m_config.base_model_path = model_path
             m_config.base_inpaint_model_path = inpaint_model_path
             m_config.is_sdxl = is_sdxl
             m_config.base_dimension = base_dimension
             #
             # if config.get("model_type") == "huggingface":

     base_inpaint_model_path: str
     is_sdxl: bool = False
     base_dimension: int = 512
+    low_gpu_mem: bool = False
+    base_model_variant: Optional[str] = None
+    base_inpaint_model_variant: Optional[str] = None
 def load_model_from_config(path):
         with open(path + "/inference.json", "r") as f:
             config = json.loads(f.read())
             model_path = config.get("model_path", path)
+            inpaint_model_path = config.get("inpaint_model_path", model_path)
             is_sdxl = config.get("is_sdxl", False)
             base_dimension = config.get("base_dimension", 512)
+            base_model_variant = config.get("base_model_variant", None)
+            base_inpaint_model_variant = config.get("base_inpaint_model_variant", None)
             m_config.base_model_path = model_path
             m_config.base_inpaint_model_path = inpaint_model_path
             m_config.is_sdxl = is_sdxl
             m_config.base_dimension = base_dimension
+            m_config.low_gpu_mem = config.get("low_gpu_mem", False)
+            m_config.base_model_variant = base_model_variant
+            m_config.base_inpaint_model_variant = base_inpaint_model_variant
             #
             # if config.get("model_type") == "huggingface":

internals/util/prompt.py CHANGED Viewed

@@ -7,7 +7,7 @@ from internals.pipelines.img_to_text import Image2Text
 from internals.pipelines.prompt_modifier import PromptModifier
 from internals.util.anomaly import remove_colors
 from internals.util.avatar import Avatar
-from internals.util.config import num_return_sequences
 from internals.util.lora_style import LoraStyle
@@ -29,9 +29,9 @@ def get_patched_prompt(
     if task.is_prompt_engineering():
         prompt = prompt_modifier.modify(prompt)
     else:
-        prompt = [prompt] * num_return_sequences
-    ori_prompt = [task.get_prompt()] * num_return_sequences
     class_name = None
     add_style_and_character(ori_prompt, class_name)
@@ -60,7 +60,7 @@ def get_patched_prompt_text2img(
         if task.is_prompt_engineering():
             mod_prompt = prompt_modifier.modify(task.get_prompt())
         else:
-            mod_prompt = [task.get_prompt()] * num_return_sequences
         prompt, prompt_left, prompt_right = [], [], []
         for i in range(len(mod_prompt)):
@@ -82,11 +82,12 @@ def get_patched_prompt_text2img(
         if task.is_prompt_engineering():
             mod_prompt = prompt_modifier.modify(task.get_prompt())
         else:
-            mod_prompt = [task.get_prompt()] * num_return_sequences
         mod_prompt = [add_style_and_character(mp) for mp in mod_prompt]
         params = Text2Img.Params(
-            prompt=[add_style_and_character(task.get_prompt())] * num_return_sequences,
             modified_prompt=mod_prompt,
         )

 from internals.pipelines.prompt_modifier import PromptModifier
 from internals.util.anomaly import remove_colors
 from internals.util.avatar import Avatar
+from internals.util.config import get_num_return_sequences
 from internals.util.lora_style import LoraStyle
     if task.is_prompt_engineering():
         prompt = prompt_modifier.modify(prompt)
     else:
+        prompt = [prompt] * get_num_return_sequences()
+    ori_prompt = [task.get_prompt()] * get_num_return_sequences()
     class_name = None
     add_style_and_character(ori_prompt, class_name)
         if task.is_prompt_engineering():
             mod_prompt = prompt_modifier.modify(task.get_prompt())
         else:
+            mod_prompt = [task.get_prompt()] * get_num_return_sequences()
         prompt, prompt_left, prompt_right = [], [], []
         for i in range(len(mod_prompt)):
         if task.is_prompt_engineering():
             mod_prompt = prompt_modifier.modify(task.get_prompt())
         else:
+            mod_prompt = [task.get_prompt()] * get_num_return_sequences()
         mod_prompt = [add_style_and_character(mp) for mp in mod_prompt]
         params = Text2Img.Params(
+            prompt=[add_style_and_character(task.get_prompt())]
+            * get_num_return_sequences(),
             modified_prompt=mod_prompt,
         )

models/ultrasharp/model.py CHANGED Viewed

@@ -3,12 +3,14 @@ from typing import List
 import torch
 import models.ultrasharp.arch as arch
-from models.ultrasharp.util import infer_params, upscale_without_tiling
 class Ultrasharp:
-    def __init__(self, filename):
-        self.filename = filename
     def enhance(self, img, outscale=4):
         state_dict = torch.load(self.filename, map_location="cpu")
@@ -23,5 +25,5 @@ class Ultrasharp:
         model.to("cuda")
-        img = upscale_without_tiling(model, img)
         return img, None

 import torch
 import models.ultrasharp.arch as arch
+from models.ultrasharp.util import infer_params, upscale
 class Ultrasharp:
+    def __init__(self, model_path, tile_pad=0, tile=0):
+        self.filename = model_path
+        self.tile_pad = tile_pad
+        self.tile = tile
     def enhance(self, img, outscale=4):
         state_dict = torch.load(self.filename, map_location="cpu")
         model.to("cuda")
+        img = upscale(model, img, self.tile_pad, self.tile)
         return img, None

models/ultrasharp/util.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import numpy as np
 import torch
@@ -32,14 +34,93 @@ def infer_params(state_dict):
     return in_nc, out_nc, nf, nb, plus, scale
-def upscale_without_tiling(model, img):
     img = np.array(img)
     img = img[:, :, ::-1]
     img = np.ascontiguousarray(np.transpose(img, (2, 0, 1))) / 255
     img = torch.from_numpy(img).float()
     img = img.unsqueeze(0).to("cuda")
-    with torch.no_grad():
-        output = model(img)
     output = output.squeeze().float().cpu().clamp_(0, 1).numpy()
     output = 255.0 * np.moveaxis(output, 0, 2)
     output = output.astype(np.uint8)

+import math
 import numpy as np
 import torch
     return in_nc, out_nc, nf, nb, plus, scale
+def tile_process(model, img, tile_pad, tile_size, scale=4):
+    """It will first crop input images to tiles, and then process each tile.
+    Finally, all the processed tiles are merged into one images.
+    Modified from: https://github.com/ata4/esrgan-launcher
+    """
+    batch, channel, height, width = img.shape
+    output_height = height * scale
+    output_width = width * scale
+    output_shape = (batch, channel, output_height, output_width)
+    # start with black image
+    output = img.new_zeros(output_shape)
+    tiles_x = math.ceil(width / tile_size)
+    tiles_y = math.ceil(height / tile_size)
+    # loop over all tiles
+    for y in range(tiles_y):
+        for x in range(tiles_x):
+            # extract tile from input image
+            ofs_x = x * tile_size
+            ofs_y = y * tile_size
+            # input tile area on total image
+            input_start_x = ofs_x
+            input_end_x = min(ofs_x + tile_size, width)
+            input_start_y = ofs_y
+            input_end_y = min(ofs_y + tile_size, height)
+            # input tile area on total image with padding
+            input_start_x_pad = max(input_start_x - tile_pad, 0)
+            input_end_x_pad = min(input_end_x + tile_pad, width)
+            input_start_y_pad = max(input_start_y - tile_pad, 0)
+            input_end_y_pad = min(input_end_y + tile_pad, height)
+            # input tile dimensions
+            input_tile_width = input_end_x - input_start_x
+            input_tile_height = input_end_y - input_start_y
+            tile_idx = y * tiles_x + x + 1
+            input_tile = img[
+                :,
+                :,
+                input_start_y_pad:input_end_y_pad,
+                input_start_x_pad:input_end_x_pad,
+            ]
+            # upscale tile
+            try:
+                with torch.no_grad():
+                    output_tile = model(input_tile)
+            except RuntimeError as error:
+                print("Error", error)
+            print(f"\tTile {tile_idx}/{tiles_x * tiles_y}")
+            # output tile area on total image
+            output_start_x = input_start_x * scale
+            output_end_x = input_end_x * scale
+            output_start_y = input_start_y * scale
+            output_end_y = input_end_y * scale
+            # output tile area without padding
+            output_start_x_tile = (input_start_x - input_start_x_pad) * scale
+            output_end_x_tile = output_start_x_tile + input_tile_width * scale
+            output_start_y_tile = (input_start_y - input_start_y_pad) * scale
+            output_end_y_tile = output_start_y_tile + input_tile_height * scale
+            # put tile into output image
+            output[
+                :, :, output_start_y:output_end_y, output_start_x:output_end_x
+            ] = output_tile[
+                :,
+                :,
+                output_start_y_tile:output_end_y_tile,
+                output_start_x_tile:output_end_x_tile,
+            ]
+    return output
+def upscale(model, img, tile_pad, tile_size):
     img = np.array(img)
     img = img[:, :, ::-1]
     img = np.ascontiguousarray(np.transpose(img, (2, 0, 1))) / 255
     img = torch.from_numpy(img).float()
     img = img.unsqueeze(0).to("cuda")
+    output = tile_process(model, img, tile_pad, tile_size, scale=4)
     output = output.squeeze().float().cpu().clamp_(0, 1).numpy()
     output = 255.0 * np.moveaxis(output, 0, 2)
     output = output.astype(np.uint8)

requirements.txt CHANGED Viewed

@@ -1,11 +1,11 @@
 boto3==1.24.61
 triton==2.0.0
-diffusers==0.23.0
 fastapi==0.87.0
 Pillow==9.3.0
 redis==4.3.4
 requests==2.28.1
-transformers==4.34.1
 rembg==2.0.30
 gfpgan==1.3.8
 rembg==2.0.30

 boto3==1.24.61
 triton==2.0.0
+diffusers==0.25.0
 fastapi==0.87.0
 Pillow==9.3.0
 redis==4.3.4
 requests==2.28.1
+transformers==4.36.2
 rembg==2.0.30
 gfpgan==1.3.8
 rembg==2.0.30