yonishafir commited on
Commit
e189e0b
·
verified ·
1 Parent(s): 58a42f9

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +30 -8
README.md CHANGED
@@ -85,6 +85,21 @@ import requests
85
  import PIL
86
  from io import BytesIO
87
  from torchvision import transforms
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
 
90
  def download_image(url):
@@ -106,6 +121,7 @@ def get_masked_image(image, image_mask, width, height):
106
  masked_image_to_present = Image.fromarray((masked_image_to_present * 255.0).astype(np.uint8))
107
  return image, image_mask_pil, masked_image_to_present
108
 
 
109
  image_transforms = transforms.Compose(
110
  [
111
  transforms.ToTensor(),
@@ -120,7 +136,11 @@ mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data
120
  init_image = download_image(img_url).resize((1024, 1024))
121
  mask_image = download_image(mask_url).resize((1024, 1024))
122
 
123
- mask_image = mask_image.convert("L")
 
 
 
 
124
 
125
  width, height = init_image.size
126
 
@@ -132,18 +152,21 @@ pipe = StableDiffusionXLControlNetPipeline.from_pretrained("briaai/BRIA-2.3", co
132
  pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
133
  pipe.load_lora_weights("briaai/BRIA-2.3-FAST-LORA")
134
  pipe.fuse_lora()
 
135
 
136
- pipe = pipe.to('cuda:0')
137
- pipe.enable_xformers_memory_efficient_attention()
138
 
139
- generator = torch.Generator(device='cuda:0').manual_seed(123456)
140
 
141
  vae = pipe.vae
142
 
 
143
  masked_image, image_mask, masked_image_to_present = get_masked_image(init_image, mask_image, width, height)
 
144
  masked_image_tensor = image_transforms(masked_image)
145
  masked_image_tensor = (masked_image_tensor - 0.5) / 0.5
146
 
 
147
  masked_image_tensor = masked_image_tensor.unsqueeze(0).to(device="cuda")
148
  control_latents = vae.encode(
149
  masked_image_tensor[:, :3, :, :].to(vae.dtype)
@@ -160,13 +183,13 @@ mask_tensor = mask_tensor / 255.0
160
 
161
  mask_tensor = mask_tensor.to(device="cuda")
162
  mask_resized = torch.nn.functional.interpolate(mask_tensor[None, ...], size=(control_latents.shape[2], control_latents.shape[3]), mode='nearest')
163
- # mask_resized = mask_resized.to(torch.float16)
164
  masked_image = torch.cat([control_latents, mask_resized], dim=1)
165
 
166
- prompt = "A park bench"
167
 
168
  gen_img = pipe(negative_prompt=default_negative_prompt, prompt=prompt,
169
- controlnet_conditioning_sale=1.0,
170
  num_inference_steps=12,
171
  height=height, width=width,
172
  image = masked_image, # control image
@@ -175,6 +198,5 @@ gen_img = pipe(negative_prompt=default_negative_prompt, prompt=prompt,
175
  guidance_scale = 1.2,
176
  generator=generator).images[0]
177
 
178
-
179
  ```
180
 
 
85
  import PIL
86
  from io import BytesIO
87
  from torchvision import transforms
88
+ import pandas as pd
89
+ import os
90
+
91
+
92
+ def resize_image_to_retain_ratio(image):
93
+ pixel_number = 1024*1024
94
+ granularity_val = 8
95
+ ratio = image.size[0] / image.size[1]
96
+ width = int((pixel_number * ratio) ** 0.5)
97
+ width = width - (width % granularity_val)
98
+ height = int(pixel_number / width)
99
+ height = height - (height % granularity_val)
100
+
101
+ image = image.resize((width, height))
102
+ return image
103
 
104
 
105
  def download_image(url):
 
121
  masked_image_to_present = Image.fromarray((masked_image_to_present * 255.0).astype(np.uint8))
122
  return image, image_mask_pil, masked_image_to_present
123
 
124
+
125
  image_transforms = transforms.Compose(
126
  [
127
  transforms.ToTensor(),
 
136
  init_image = download_image(img_url).resize((1024, 1024))
137
  mask_image = download_image(mask_url).resize((1024, 1024))
138
 
139
+
140
+ init_image = resize_image_to_retain_ratio(init_image)
141
+ width, height = init_image.size
142
+
143
+ mask_image = mask_image.convert("L").resize(init_image.size)
144
 
145
  width, height = init_image.size
146
 
 
152
  pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
153
  pipe.load_lora_weights("briaai/BRIA-2.3-FAST-LORA")
154
  pipe.fuse_lora()
155
+ pipe = pipe.to(device="cuda")
156
 
157
+ # pipe.enable_xformers_memory_efficient_attention()
 
158
 
159
+ generator = torch.Generator(device="cuda").manual_seed(123456)
160
 
161
  vae = pipe.vae
162
 
163
+
164
  masked_image, image_mask, masked_image_to_present = get_masked_image(init_image, mask_image, width, height)
165
+
166
  masked_image_tensor = image_transforms(masked_image)
167
  masked_image_tensor = (masked_image_tensor - 0.5) / 0.5
168
 
169
+
170
  masked_image_tensor = masked_image_tensor.unsqueeze(0).to(device="cuda")
171
  control_latents = vae.encode(
172
  masked_image_tensor[:, :3, :, :].to(vae.dtype)
 
183
 
184
  mask_tensor = mask_tensor.to(device="cuda")
185
  mask_resized = torch.nn.functional.interpolate(mask_tensor[None, ...], size=(control_latents.shape[2], control_latents.shape[3]), mode='nearest')
186
+
187
  masked_image = torch.cat([control_latents, mask_resized], dim=1)
188
 
189
+ prompt = ""
190
 
191
  gen_img = pipe(negative_prompt=default_negative_prompt, prompt=prompt,
192
+ controlnet_conditioning_scale=1.0,
193
  num_inference_steps=12,
194
  height=height, width=width,
195
  image = masked_image, # control image
 
198
  guidance_scale = 1.2,
199
  generator=generator).images[0]
200
 
 
201
  ```
202