BRIA-3.2-ControlNet-Background-Generation, Model Card
BRIA 3.2 ControlNet-Background Generation, trained on the foundation of BRIA 3.2 Text-to-Image, enables the generation of high-quality images guided by a textual prompt and the extracted background mask estimation from an input image. This allows for the creation of different background variations of an image, all sharing the same foreground.
Model Description
Developed by: BRIA AI
Model type: ControlNet for Latent diffusion
License: bria-3.2
Model Description: ControlNet Background-Generation for BRIA 3.2 Text-to-Image model. The model generates images guided by text and the background mask.
Resources for more information: BRIA AI
Usage
Installation
pip install -qr https://huggingface.co/briaai/bria-3.2-controlnet-bg-gen/resolve/main/requirements.txt
install the latest version of diffusers:
pip install git+https://github.com/huggingface/diffusers
from huggingface_hub import hf_hub_download
import os
try:
local_dir = os.path.dirname(__file__)
except:
local_dir = '.'
hf_hub_download(repo_id="briaai/BRIA-3.2-ControlNet-BG-Gen", filename='controlnet_bria.py', local_dir=local_dir)
hf_hub_download(repo_id="briaai/BRIA-3.2-ControlNet-BG-Gen", filename='pipeline_bria_controlnet_inpainting.py', local_dir=local_dir)
hf_hub_download(repo_id="briaai/BRIA-3.2-ControlNet-BG-Gen", filename='utilities.py', local_dir=local_dir)
Run Inpainting script
from io import BytesIO
import requests
import torch
from PIL import Image
from PIL.Image import Image as ImageType
from utilities import remove_bg_from_image
from controlnet_bria import BriaControlNetModel
from pipeline_bria_controlnet_inpainting import BriaControlNetInpaintingPipeline
GRANULARITY_VAL = 16
def load_gpu_pipeline_from_models():
transformer_path = "briaai/BRIA-3.2"
print("Done getting transformer")
print("Getting ControlNet")
# controlnet = BriaControlNetModel.from_pretrained(f"./controlnet_replace_bg_3_2_weights")
controlnet = BriaControlNetModel.from_pretrained("briaai/BRIA-3.2-ControlNet-BG-Gen", subfolder="controlnet_replace_bg_3_2_weights")
print("Done getting controlnet")
transformer_path = "briaai/BRIA-3.2"
pipeline = BriaControlNetInpaintingPipeline.from_pretrained(transformer_path,
revision="pre_diffusers_support" ,
controlnet=controlnet, torch_dtype=torch.bfloat16,
trust_remote_code=True)
pipeline = pipeline.to(device="cuda", dtype=torch.bfloat16)
pipeline.enable_model_cpu_offload()
return pipeline
def download_image(url):
response = requests.get(url)
return Image.open(BytesIO(response.content))
def resize_image_to_retain_ratio(image):
pixel_number = 1024*1024
ratio = image.size[0] / image.size[1]
width = int((pixel_number * ratio) ** 0.5)
width = width - (width % GRANULARITY_VAL)
height = int(pixel_number / width)
height = height - (height % GRANULARITY_VAL)
image = image.resize((width, height))
return image
def preprocess_input_image(control_image: Image.Image, mask: Image.Image):
control_image_with_mask = Image.new("RGB", control_image.size, (0, 0, 0))
control_image_with_mask.paste(control_image, (0, 0), mask)
resized_image = resize_image_to_retain_ratio(control_image_with_mask)
return resized_image
def infer(pipeline: BriaControlNetInpaintingPipeline,
input_image: ImageType,
mask_image: ImageType,
prompt: str,
negative_prompt: str,
num_inference_steps: int,
seed: int,
guidance_scale: float,
controlnet_conditioning_scale: float,
strength: float
):
generator = torch.Generator(device="cuda").manual_seed(seed)
image_arr = pipeline(
prompt=prompt,
control_image=[input_image],
num_inference_steps=num_inference_steps,
generator=generator,
guidance_scale=guidance_scale,
height=input_image.height,
width=input_image.width,
negative_prompt=negative_prompt,
max_sequence_length=128,
control_mode=None,
mask=mask_image,
controlnet_conditioning_scale=controlnet_conditioning_scale,
strength=strength,
invert_mask_before_latents_override_fg=True # crucial for replace-bg usage
).images[0]
return image_arr
gpu_pipeline = load_gpu_pipeline_from_models()
seed = 9871256
# call parameters we set for optimal results
num_inference_steps = 50
guidance_scale = 4.0
controlnet_conditioning_scale = 1.0
strength = 1.0
example_img_path = "spray_bottle.png"
prompt = "a spray bottle standing on a small und reflective dish placed on a dark wooden table outdoors at night. to the left, a few lavender sprigs lie on the table, and to the right, a folded navy cloth napkin is neatly arranged. in the background, green shrubs and a wooden fence are clearly visible. sharp and detailed background. shot from a top-down angle under natural moonlight."
negative_prompt = "Logo,Watermark,Text,Ugly,Morbid,Extra fingers,Poorly drawn hands,Mutation,Blurry,Extra limbs,Gross proportions,Missing arms,Mutated hands,Long neck,Duplicate,Mutilated,Mutilated hands,Poorly drawn face,Deformed,Bad anatomy,Cloned face,Malformed limbs,Missing legs,Too many fingers"
input_img = Image.open(example_img_path)
input_mask = remove_bg_from_image(input_img)
resized_img = preprocess_input_image(input_img, input_mask)
resized_mask = input_mask.resize(resized_img.size, resample=Image.Resampling.BILINEAR)
gen_img = infer(pipeline=gpu_pipeline,
input_image=resized_img,
mask_image=resized_mask,
prompt=prompt,
negative_prompt=negative_prompt,
num_inference_steps=num_inference_steps,
seed=seed,
guidance_scale=guidance_scale,
controlnet_conditioning_scale=controlnet_conditioning_scale,
strength=strength)
# paste foreground on generated image
gen_img.paste(resized_img, resized_mask)
- Downloads last month
- -
