interior-ai-designer / codebase.md
Bobby
inpainting test
b85b7f4

A newer version of the Gradio SDK is available: 6.2.0

Upgrade

preprocess.py

import PIL.Image
import torch, gc
from controlnet_aux_local import NormalBaeDetector#, CannyDetector

class Preprocessor:
    MODEL_ID = "lllyasviel/Annotators"

    def __init__(self):
        self.model = None
        self.name = ""

    def load(self, name: str) -> None:
        if name == self.name:
            return
        elif name == "NormalBae":
            print("Loading NormalBae")
            self.model = NormalBaeDetector.from_pretrained(self.MODEL_ID).to("cuda")
            torch.cuda.empty_cache()
            self.name = name
        else:
            raise ValueError
        return

    def __call__(self, image: PIL.Image.Image, **kwargs) -> PIL.Image.Image:
        return self.model(image, **kwargs)

app.py

prod = False
port = 8080
show_options = False
if prod:
    port = 8081
    # show_options = False

import os
import random
import time
import gradio as gr
import numpy as np
import spaces
import imageio
from huggingface_hub import HfApi
import gc
import torch
from PIL import Image
from diffusers import (
    ControlNetModel,
    DPMSolverMultistepScheduler,
    StableDiffusionControlNetPipeline,
    # AutoencoderKL,
)
from controlnet_aux_local import NormalBaeDetector

MAX_SEED = np.iinfo(np.int32).max
API_KEY = os.environ.get("API_KEY", None)
# os.environ['HF_HOME'] = '/data/.huggingface'

print("CUDA version:", torch.version.cuda)
print("loading everything")
compiled = False
api = HfApi()

class Preprocessor:
    MODEL_ID = "lllyasviel/Annotators"

    def __init__(self):
        self.model = None
        self.name = ""

    def load(self, name: str) -> None:
        if name == self.name:
            return
        elif name == "NormalBae":
            print("Loading NormalBae")
            self.model = NormalBaeDetector.from_pretrained(self.MODEL_ID).to("cuda")
            torch.cuda.empty_cache()
            self.name = name
        else:
            raise ValueError
        return

    def __call__(self, image: Image.Image, **kwargs) -> Image.Image:
        return self.model(image, **kwargs)

if gr.NO_RELOAD:
    # Controlnet Normal
    model_id = "lllyasviel/control_v11p_sd15_normalbae"
    print("initializing controlnet")
    controlnet = ControlNetModel.from_pretrained(
        model_id,
        torch_dtype=torch.float16,
        attn_implementation="flash_attention_2",
    ).to("cuda")

    # Scheduler
    scheduler = DPMSolverMultistepScheduler.from_pretrained(
        "runwayml/stable-diffusion-v1-5",
        solver_order=2,
        subfolder="scheduler",
        use_karras_sigmas=True,
        final_sigmas_type="sigma_min",
        algorithm_type="sde-dpmsolver++",
        prediction_type="epsilon",
        thresholding=False,
        denoise_final=True,
        device_map="cuda",
        torch_dtype=torch.float16,
    )

    # Stable Diffusion Pipeline URL
    # base_model_url = "https://huggingface.co/broyang/hentaidigitalart_v20/blob/main/realcartoon3d_v15.safetensors"
    base_model_url = "https://huggingface.co/Lykon/AbsoluteReality/blob/main/AbsoluteReality_1.8.1_pruned.safetensors"
    # vae_url = "https://huggingface.co/stabilityai/sd-vae-ft-mse-original/blob/main/vae-ft-mse-840000-ema-pruned.safetensors"

    # print('loading vae')
    # vae = AutoencoderKL.from_single_file(vae_url, torch_dtype=torch.float16).to("cuda")
    # vae.to(memory_format=torch.channels_last) 

    print('loading pipe')
    pipe = StableDiffusionControlNetPipeline.from_single_file(
        base_model_url,
        safety_checker=None,
        controlnet=controlnet,
        scheduler=scheduler,
        # vae=vae,
        torch_dtype=torch.float16,
    ).to("cuda")

    print("loading preprocessor")
    preprocessor = Preprocessor()
    preprocessor.load("NormalBae")
    # pipe.load_textual_inversion("broyang/hentaidigitalart_v20", weight_name="EasyNegativeV2.safetensors", token="EasyNegativeV2",)
    # pipe.load_textual_inversion("broyang/hentaidigitalart_v20", weight_name="badhandv4.pt", token="badhandv4")
    # pipe.load_textual_inversion("broyang/hentaidigitalart_v20", weight_name="fcNeg-neg.pt", token="fcNeg-neg")
    # pipe.load_textual_inversion("broyang/hentaidigitalart_v20", weight_name="HDA_Ahegao.pt", token="HDA_Ahegao")
    # pipe.load_textual_inversion("broyang/hentaidigitalart_v20", weight_name="HDA_Bondage.pt", token="HDA_Bondage")
    # pipe.load_textual_inversion("broyang/hentaidigitalart_v20", weight_name="HDA_pet_play.pt", token="HDA_pet_play")
    # pipe.load_textual_inversion("broyang/hentaidigitalart_v20", weight_name="HDA_unconventional maid.pt", token="HDA_unconventional_maid")
    # pipe.load_textual_inversion("broyang/hentaidigitalart_v20", weight_name="HDA_NakedHoodie.pt", token="HDA_NakedHoodie")
    # pipe.load_textual_inversion("broyang/hentaidigitalart_v20", weight_name="HDA_NunDress.pt", token="HDA_NunDress")
    # pipe.load_textual_inversion("broyang/hentaidigitalart_v20", weight_name="HDA_Shibari.pt", token="HDA_Shibari")
    pipe.to("cuda")

    print("---------------Loaded controlnet pipeline---------------") 
    torch.cuda.empty_cache()
    gc.collect()
    print(f"CUDA memory allocated: {torch.cuda.max_memory_allocated(device='cuda') / 1e9:.2f} GB")
    print("Model Compiled!")

def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    return seed

def get_additional_prompt():
    prompt = "hyperrealistic photography,extremely detailed,(intricate details),unity 8k wallpaper,ultra detailed"
    top = ["tank top", "blouse", "button up shirt", "sweater", "corset top"]
    bottom = ["short skirt", "athletic shorts", "jean shorts", "pleated skirt", "short skirt", "leggings", "high-waisted shorts"]
    accessory = ["knee-high boots", "gloves", "Thigh-high stockings", "Garter belt", "choker", "necklace", "headband", "headphones"]
    return f"{prompt}, {random.choice(top)}, {random.choice(bottom)}, {random.choice(accessory)}, score_9"
    # outfit = ["schoolgirl outfit", "playboy outfit", "red dress", "gala dress", "cheerleader outfit", "nurse outfit", "Kimono"]

def get_prompt(prompt, additional_prompt):
    interior = "design-style interior designed (interior space),tungsten white balance,captured with a DSLR camera using f/10 aperture, 1/60 sec shutter speed, ISO 400, 20mm focal length"
    default = "hyperrealistic photography,extremely detailed,(intricate details),unity 8k wallpaper,ultra detailed"
    default2 = f"professional 3d model {prompt},octane render,highly detailed,volumetric,dramatic lighting,hyperrealistic photography,extremely detailed,(intricate details),unity 8k wallpaper,ultra detailed"
    randomize = get_additional_prompt()
    # nude = "NSFW,((nude)),medium bare breasts,hyperrealistic photography,extremely detailed,(intricate details),unity 8k wallpaper,ultra detailed"
    # bodypaint = "((fully naked with no clothes)),nude naked seethroughxray,invisiblebodypaint,rating_newd,NSFW"
    lab_girl = "hyperrealistic photography, extremely detailed, shy assistant wearing minidress boots and gloves, laboratory background, score_9, 1girl"
    pet_play = "hyperrealistic photography, extremely detailed, playful, blush, glasses, collar, score_9, HDA_pet_play"
    bondage = "hyperrealistic photography, extremely detailed, submissive, glasses, score_9, HDA_Bondage"
    # ahegao = "((invisible clothing)), hyperrealistic photography,exposed vagina,sexy,nsfw,HDA_Ahegao"
    ahegao2 = "(invisiblebodypaint),rating_newd,HDA_Ahegao"
    athleisure = "hyperrealistic photography, extremely detailed, 1girl athlete, exhausted embarrassed sweaty,outdoors, ((athleisure clothing)), score_9"
    atompunk = "((atompunk world)), hyperrealistic photography, extremely detailed, short hair, bodysuit, glasses, neon cyberpunk background, score_9"
    maid = "hyperrealistic photography, extremely detailed, shy, blushing, score_9, pastel background, HDA_unconventional_maid"
    nundress = "hyperrealistic photography, extremely detailed, shy, blushing, fantasy background, score_9, HDA_NunDress"
    naked_hoodie = "hyperrealistic photography, extremely detailed, medium hair, cityscape, (neon lights), score_9, HDA_NakedHoodie"
    abg = "(1girl, asian body covered in words, words on body, tattoos of (words) on body),(masterpiece, best quality),medium breasts,(intricate details),unity 8k wallpaper,ultra detailed,(pastel colors),beautiful and aesthetic,see-through (clothes),detailed,solo"
    # shibari = "extremely detailed, hyperrealistic photography, earrings, blushing, lace choker, tattoo, medium hair, score_9, HDA_Shibari"
    shibari2 = "octane render, highly detailed, volumetric, HDA_Shibari"
    
    if prompt == "":
        girls = [randomize, pet_play, bondage, lab_girl, athleisure, atompunk, maid, nundress, naked_hoodie, abg, shibari2, ahegao2]
        prompts_nsfw = [abg, shibari2, ahegao2]
        prompt = f"{random.choice(girls)}"
        prompt = f"boho chic"
        # print(f"-------------{preset}-------------")
    else:
        prompt = f"Photo from Pinterest of {prompt} {interior}"
        # prompt = default2
    return f"{prompt} f{additional_prompt}"

style_list = [
    {
        "name": "None",
        "prompt": ""
    },
    {
        "name": "Minimalistic",
        "prompt": "Minimalist interior design,clean lines,neutral colors,uncluttered space,functional furniture,lots of natural light"
    },
    {
        "name": "Boho",
        "prompt": "Bohemian chic interior,eclectic mix of patterns and textures,vintage furniture,plants,woven textiles,warm earthy colors"
    },
    {
        "name": "Farmhouse",
        "prompt": "Modern farmhouse interior,rustic wood elements,shiplap walls,neutral color palette,industrial accents,cozy textiles"
    },
    {
        "name": "Saudi Prince",
        "prompt": "Opulent gold interior,luxurious ornate furniture,crystal chandeliers,rich fabrics,marble floors,intricate Arabic patterns"
    },
    {
        "name": "Neoclassical",
        "prompt": "Neoclassical interior design,elegant columns,ornate moldings,symmetrical layout,refined furniture,muted color palette"
    },
    {
        "name": "Eclectic",
        "prompt": "Eclectic interior design,mix of styles and eras,bold color combinations,diverse furniture pieces,unique art objects"
    },
    {
        "name": "Parisian",
        "prompt": "Parisian apartment interior,all-white color scheme,ornate moldings,herringbone wood floors,elegant furniture,large windows"
    },
    {
        "name": "Hollywood",
        "prompt": "Hollywood Regency interior,glamorous and luxurious,bold colors,mirrored surfaces,velvet upholstery,gold accents"
    },
    {
        "name": "Scandinavian",
        "prompt": "Scandinavian interior design,light wood tones,white walls,minimalist furniture,cozy textiles,hygge atmosphere"
    },
    {
        "name": "Beach",
        "prompt": "Coastal beach house interior,light blue and white color scheme,weathered wood,nautical accents,sheer curtains,ocean view"
    },
    {
        "name": "Japanese",
        "prompt": "Traditional Japanese interior,tatami mats,shoji screens,low furniture,zen garden view,minimalist decor,natural materials"
    },
    { 
        "name": "Midcentury Modern",
        "prompt": "Mid-century modern interior,1950s-60s style furniture,organic shapes,warm wood tones,bold accent colors,large windows"
    },
    {
        "name": "Retro Futurism",
        "prompt": "Neon (atompunk world) retro cyberpunk background",
    },
    {
        "name": "Texan",
        "prompt": "Western cowboy interior,rustic wood beams,leather furniture,cowhide rugs,antler chandeliers,southwestern patterns"
    },
    {
        "name": "Matrix",
        "prompt": "Futuristic cyberpunk interior,neon accent lighting,holographic plants,sleek black surfaces,advanced gaming setup,transparent screens,Blade Runner inspired decor,high-tech minimalist furniture"
    }
] 

styles = {k["name"]: (k["prompt"]) for k in style_list}
STYLE_NAMES = list(styles.keys())

def apply_style(style_name):
    if style_name in styles:
        p = styles.get(style_name, "none")
    return p

    
css = """
h1, h2, h3 {
    text-align: center;
    display: block;
}
footer {
    visibility: hidden;
}
.gradio-container {
    max-width: 1100px !important;
}
.gr-image {
    display: flex;
    justify-content: center; 
    align-items: center;
    width: 100%;
    height: 512px;
    overflow: hidden;
}
.gr-image img {
    width: 100%;
    height: 100%; 
    object-fit: cover;
    object-position: center;
}
"""
with gr.Blocks(theme="bethecloud/storj_theme", css=css) as demo:
    #############################################################################
    with gr.Row():
        with gr.Accordion("Advanced options", open=show_options, visible=show_options):
            num_images = gr.Slider(
                label="Images", minimum=1, maximum=4, value=1, step=1
            )
            image_resolution = gr.Slider(
                label="Image resolution",
                minimum=256,
                maximum=1024,
                value=512,
                step=256,
            )
            preprocess_resolution = gr.Slider(
                label="Preprocess resolution",
                minimum=128,
                maximum=1024,
                value=512,
                step=1,
            )
            num_steps = gr.Slider(
                label="Number of steps", minimum=1, maximum=100, value=15, step=1
            )  # 20/4.5 or 12 without lora, 4 with lora
            guidance_scale = gr.Slider(
                label="Guidance scale", minimum=0.1, maximum=30.0, value=5.5, step=0.1
            )  # 5 without lora, 2 with lora
            seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
            a_prompt = gr.Textbox(
                label="Additional prompt",
                value = "design-style interior designed (interior space), tungsten white balance, captured with a DSLR camera using f/10 aperture, 1/60 sec shutter speed, ISO 400, 20mm focal length"
            )
            n_prompt = gr.Textbox(
                label="Negative prompt",
                value="EasyNegativeV2, fcNeg, (badhandv4:1.4), (worst quality, low quality, bad quality, normal quality:2.0), (bad hands, missing fingers, extra fingers:2.0)",
            )
    #############################################################################
    # input text
    with gr.Column():
        prompt = gr.Textbox(
            label="Custom Design",
            placeholder="Enter a description (optional)",
        )
    # design options
    with gr.Row(visible=True):
        style_selection = gr.Radio(
            show_label=True,
            container=True,
            interactive=True,
            choices=STYLE_NAMES,
            value="None",
            label="Design Styles",
        )
    # input image
    with gr.Row(equal_height=True):
        with gr.Column(scale=1, min_width=300):
            image = gr.Image(
                label="Input",
                sources=["upload"],
                show_label=True,
                mirror_webcam=True,
                type="pil",
            )
            # run button
            with gr.Column():
                run_button = gr.Button(value="Use this one", size="lg", visible=False)
        # output image
        with gr.Column(scale=1, min_width=300):
            result = gr.Image(  
                label="Output",
                interactive=False,
                type="pil",
                show_share_button= False,
            )
            # Use this image button
            with gr.Column():
                use_ai_button = gr.Button(value="Use this one", size="lg", visible=False)
    config = [
        image,
        style_selection,
        prompt,
        a_prompt,
        n_prompt,
        num_images,
        image_resolution,
        preprocess_resolution,
        num_steps,
        guidance_scale,
        seed,
    ]
    
    with gr.Row():
        helper_text = gr.Markdown("## Tap and hold (on mobile) to save the image.", visible=True)
    
    # image processing
    @gr.on(triggers=[image.upload, prompt.submit, run_button.click], inputs=config, outputs=result, show_progress="minimal")
    def auto_process_image(image, style_selection, prompt, a_prompt, n_prompt, num_images, image_resolution, preprocess_resolution, num_steps, guidance_scale, seed, progress=gr.Progress(track_tqdm=True)):
        return process_image(image, style_selection, prompt, a_prompt, n_prompt, num_images, image_resolution, preprocess_resolution, num_steps, guidance_scale, seed)
    
    # AI image processing
    @gr.on(triggers=[use_ai_button.click], inputs=[result] + config, outputs=[image, result], show_progress="minimal")
    def submit(previous_result, image, style_selection, prompt, a_prompt, n_prompt, num_images, image_resolution, preprocess_resolution, num_steps, guidance_scale, seed, progress=gr.Progress(track_tqdm=True)):
        # First, yield the previous result to update the input image immediately
        yield previous_result, gr.update()
        # Then, process the new input image
        new_result = process_image(previous_result, style_selection, prompt, a_prompt, n_prompt, num_images, image_resolution, preprocess_resolution, num_steps, guidance_scale, seed)
        # Finally, yield the new result
        yield previous_result, new_result

    # Turn off buttons when processing
    @gr.on(triggers=[image.upload, use_ai_button.click, run_button.click], inputs=None, outputs=[run_button, use_ai_button], show_progress="hidden")
    def turn_buttons_off():
        return gr.update(visible=False), gr.update(visible=False)
    
    # Turn on buttons when processing is complete
    @gr.on(triggers=[result.change], inputs=None, outputs=[use_ai_button, run_button], show_progress="hidden")
    def turn_buttons_on():
        return gr.update(visible=True), gr.update(visible=True)

@spaces.GPU(duration=12)
@torch.inference_mode()
def process_image(
    image,
    style_selection,
    prompt,
    a_prompt,
    n_prompt,
    num_images,
    image_resolution,
    preprocess_resolution,
    num_steps,
    guidance_scale,
    seed,
):
    preprocess_start = time.time()
    print("processing image")

    seed = random.randint(0, MAX_SEED)
    generator = torch.cuda.manual_seed(seed)
    preprocessor.load("NormalBae")
    control_image = preprocessor(
        image=image,
        image_resolution=image_resolution,
        detect_resolution=preprocess_resolution,
    )
    preprocess_time = time.time() - preprocess_start
    if style_selection is not None or style_selection != "None":
        prompt = "Photo from Pinterest of " + apply_style(style_selection) + " " + prompt + "," + a_prompt
    else:
        prompt=str(get_prompt(prompt, a_prompt))
    negative_prompt=str(n_prompt)
    print(prompt)
    print(f"\n-------------------------Preprocess done in: {preprocess_time:.2f} seconds-------------------------")    
    start = time.time()
    results = pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        guidance_scale=guidance_scale,
        num_images_per_prompt=num_images,
        num_inference_steps=num_steps,
        generator=generator,
        image=control_image,
    ).images[0]
    print(f"\n-------------------------Inference done in: {time.time() - start:.2f} seconds-------------------------")
    torch.cuda.empty_cache()
    
    # upload block
    timestamp = int(time.time())
    img_path = f"{timestamp}.jpg"
    results_path = f"{timestamp}_out.jpg"
    imageio.imsave(img_path, image)
    imageio.imsave(results_path, results)
    api.upload_file(
        path_or_fileobj=img_path,
        path_in_repo=img_path,
        repo_id="broyang/interior-ai-outputs",
        repo_type="dataset",
        token=API_KEY,
        run_as_future=True,
    )
    api.upload_file(
        path_or_fileobj=results_path,
        path_in_repo=results_path,
        repo_id="broyang/interior-ai-outputs",
        repo_type="dataset",
        token=API_KEY,
        run_as_future=True,
    )
    return results

if prod:
    demo.queue(max_size=20).launch(server_name="localhost", server_port=port)
else:
    demo.queue(api_open=False).launch(show_api=False)

.aidigestignore

controlnet_aux_local/normalbae/*
requirements.txt
win.requirements.txt
web.html
client.py
local_app.py
README.md
Dockerfile
.gitignore
.gitattributes

controlnet_aux_local/util.py

import os
import random

import cv2
import numpy as np
import torch

annotator_ckpts_path = os.path.join(os.path.dirname(__file__), 'ckpts')


def HWC3(x):
    assert x.dtype == np.uint8
    if x.ndim == 2:
        x = x[:, :, None]
    assert x.ndim == 3
    H, W, C = x.shape
    assert C == 1 or C == 3 or C == 4
    if C == 3:
        return x
    if C == 1:
        return np.concatenate([x, x, x], axis=2)
    if C == 4:
        color = x[:, :, 0:3].astype(np.float32)
        alpha = x[:, :, 3:4].astype(np.float32) / 255.0
        y = color * alpha + 255.0 * (1.0 - alpha)
        y = y.clip(0, 255).astype(np.uint8)
        return y


def make_noise_disk(H, W, C, F):
    noise = np.random.uniform(low=0, high=1, size=((H // F) + 2, (W // F) + 2, C))
    noise = cv2.resize(noise, (W + 2 * F, H + 2 * F), interpolation=cv2.INTER_CUBIC)
    noise = noise[F: F + H, F: F + W]
    noise -= np.min(noise)
    noise /= np.max(noise)
    if C == 1:
        noise = noise[:, :, None]
    return noise


def nms(x, t, s):
    x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s)

    f1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8)
    f2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8)
    f3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8)
    f4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8)

    y = np.zeros_like(x)

    for f in [f1, f2, f3, f4]:
        np.putmask(y, cv2.dilate(x, kernel=f) == x, x)

    z = np.zeros_like(y, dtype=np.uint8)
    z[y > t] = 255
    return z

def min_max_norm(x):
    x -= np.min(x)
    x /= np.maximum(np.max(x), 1e-5)
    return x


def safe_step(x, step=2):
    y = x.astype(np.float32) * float(step + 1)
    y = y.astype(np.int32).astype(np.float32) / float(step)
    return y


def img2mask(img, H, W, low=10, high=90):
    assert img.ndim == 3 or img.ndim == 2
    assert img.dtype == np.uint8

    if img.ndim == 3:
        y = img[:, :, random.randrange(0, img.shape[2])]
    else:
        y = img

    y = cv2.resize(y, (W, H), interpolation=cv2.INTER_CUBIC)

    if random.uniform(0, 1) < 0.5:
        y = 255 - y

    return y < np.percentile(y, random.randrange(low, high))


def resize_image(input_image, resolution):
    H, W, C = input_image.shape
    H = float(H)
    W = float(W)
    k = float(resolution) / min(H, W)
    H *= k
    W *= k
    H = int(np.round(H / 64.0)) * 64
    W = int(np.round(W / 64.0)) * 64
    img = cv2.resize(input_image, (W, H), interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA)
    return img


def torch_gc():
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()


def ade_palette():
    """ADE20K palette that maps each class to RGB values."""
    return [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50],
            [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255],
            [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7],
            [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82],
            [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3],
            [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255],
            [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220],
            [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224],
            [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255],
            [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7],
            [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153],
            [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255],
            [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0],
            [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255],
            [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255],
            [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255],
            [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0],
            [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0],
            [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255],
            [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255],
            [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20],
            [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255],
            [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255],
            [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255],
            [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0],
            [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0],
            [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255],
            [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112],
            [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160],
            [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163],
            [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0],
            [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0],
            [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255],
            [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204],
            [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255],
            [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255],
            [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194],
            [102, 255, 0], [92, 0, 255]]

controlnet_aux_local/processor.py

"""
This file contains a Processor that can be used to process images with controlnet aux processors
"""
import io
import logging
from typing import Dict, Optional, Union

from PIL import Image

from controlnet_aux_local import (CannyDetector, ContentShuffleDetector, HEDdetector,
                            LeresDetector, LineartAnimeDetector,
                            LineartDetector, MediapipeFaceDetector,
                            MidasDetector, MLSDdetector, NormalBaeDetector,
                            OpenposeDetector, PidiNetDetector, ZoeDetector,
                            DWposeDetector)

LOGGER = logging.getLogger(__name__)


MODELS = {
    # checkpoint models
    'scribble_hed': {'class': HEDdetector, 'checkpoint': True},
    'softedge_hed': {'class': HEDdetector, 'checkpoint': True},
    'scribble_hedsafe': {'class': HEDdetector, 'checkpoint': True},
    'softedge_hedsafe': {'class': HEDdetector, 'checkpoint': True},
    'depth_midas': {'class': MidasDetector, 'checkpoint': True},
    'mlsd': {'class': MLSDdetector, 'checkpoint': True},
    'openpose': {'class': OpenposeDetector, 'checkpoint': True},
    'openpose_face': {'class': OpenposeDetector, 'checkpoint': True},
    'openpose_faceonly': {'class': OpenposeDetector, 'checkpoint': True},
    'openpose_full': {'class': OpenposeDetector, 'checkpoint': True},
    'openpose_hand': {'class': OpenposeDetector, 'checkpoint': True},
    'dwpose': {'class': DWposeDetector, 'checkpoint': True},
    'scribble_pidinet': {'class': PidiNetDetector, 'checkpoint': True},
    'softedge_pidinet': {'class': PidiNetDetector, 'checkpoint': True},
    'scribble_pidsafe': {'class': PidiNetDetector, 'checkpoint': True},
    'softedge_pidsafe': {'class': PidiNetDetector, 'checkpoint': True},
    'normal_bae': {'class': NormalBaeDetector, 'checkpoint': True},
    'lineart_coarse': {'class': LineartDetector, 'checkpoint': True},
    'lineart_realistic': {'class': LineartDetector, 'checkpoint': True},
    'lineart_anime': {'class': LineartAnimeDetector, 'checkpoint': True},
    'depth_zoe': {'class': ZoeDetector, 'checkpoint': True}, 
    'depth_leres': {'class': LeresDetector, 'checkpoint': True}, 
    'depth_leres++': {'class': LeresDetector, 'checkpoint': True}, 
    # instantiate
    'shuffle': {'class': ContentShuffleDetector, 'checkpoint': False},
    'mediapipe_face': {'class': MediapipeFaceDetector, 'checkpoint': False},
    'canny': {'class': CannyDetector, 'checkpoint': False},
}


MODEL_PARAMS = {
    'scribble_hed': {'scribble': True},
    'softedge_hed': {'scribble': False},
    'scribble_hedsafe': {'scribble': True, 'safe': True},
    'softedge_hedsafe': {'scribble': False, 'safe': True},
    'depth_midas': {},
    'mlsd': {},
    'openpose': {'include_body': True, 'include_hand': False, 'include_face': False},
    'openpose_face': {'include_body': True, 'include_hand': False, 'include_face': True},
    'openpose_faceonly': {'include_body': False, 'include_hand': False, 'include_face': True},
    'openpose_full': {'include_body': True, 'include_hand': True, 'include_face': True},
    'openpose_hand': {'include_body': False, 'include_hand': True, 'include_face': False},
    'dwpose': {},
    'scribble_pidinet': {'safe': False, 'scribble': True},
    'softedge_pidinet': {'safe': False, 'scribble': False},
    'scribble_pidsafe': {'safe': True, 'scribble': True},
    'softedge_pidsafe': {'safe': True, 'scribble': False},
    'normal_bae': {},
    'lineart_realistic': {'coarse': False},
    'lineart_coarse': {'coarse': True},
    'lineart_anime': {},
    'canny': {},
    'shuffle': {},
    'depth_zoe': {},
    'depth_leres': {'boost': False},
    'depth_leres++': {'boost': True},
    'mediapipe_face': {},
}

CHOICES = f"Choices for the processor are {list(MODELS.keys())}"


class Processor:
    def __init__(self, processor_id: str, params: Optional[Dict] = None) -> None:
        """Processor that can be used to process images with controlnet aux processors

        Args:
            processor_id (str): processor name, options are 'hed, midas, mlsd, openpose,
                                pidinet, normalbae, lineart, lineart_coarse, lineart_anime,
                                canny, content_shuffle, zoe, mediapipe_face
            params (Optional[Dict]): parameters for the processor
        """
        LOGGER.info(f"Loading {processor_id}")

        if processor_id not in MODELS:
            raise ValueError(f"{processor_id} is not a valid processor id. Please make sure to choose one of {', '.join(MODELS.keys())}")

        self.processor_id = processor_id
        self.processor = self.load_processor(self.processor_id)

        # load default params
        self.params = MODEL_PARAMS[self.processor_id]
        # update with user params
        if params:
            self.params.update(params)

    def load_processor(self, processor_id: str) -> 'Processor':
        """Load controlnet aux processors

        Args:
            processor_id (str): processor name

        Returns:
            Processor: controlnet aux processor
        """
        processor = MODELS[processor_id]['class']

        # check if the proecssor is a checkpoint model
        if MODELS[processor_id]['checkpoint']:
            processor = processor.from_pretrained("lllyasviel/Annotators")
        else:
            processor = processor()
        return processor

    def __call__(self, image: Union[Image.Image, bytes],
                 to_pil: bool = True) -> Union[Image.Image, bytes]:
        """processes an image with a controlnet aux processor

        Args:
            image (Union[Image.Image, bytes]): input image in bytes or PIL Image
            to_pil (bool): whether to return bytes or PIL Image

        Returns:
            Union[Image.Image, bytes]: processed image in bytes or PIL Image
        """
        # check if bytes or PIL Image
        if isinstance(image, bytes):
            image = Image.open(io.BytesIO(image)).convert("RGB")

        processed_image = self.processor(image, **self.params)

        if to_pil:
            return processed_image
        else:
            output_bytes = io.BytesIO()
            processed_image.save(output_bytes, format='JPEG')
            return output_bytes.getvalue()

controlnet_aux_local/init.py

__version__ = "0.0.8"

# from .hed import HEDdetector
# from .leres import LeresDetector
# from .lineart import LineartDetector
# from .lineart_anime import LineartAnimeDetector
# from .midas import MidasDetector
# from .mlsd import MLSDdetector
from .normalbae import NormalBaeDetector
# from .open_pose import OpenposeDetector
# from .pidi import PidiNetDetector
# from .zoe import ZoeDetector

# from .canny import CannyDetector
# from .mediapipe_face import MediapipeFaceDetector
# from .segment_anything import SamDetector
# from .shuffle import ContentShuffleDetector
# from .dwpose import DWposeDetector