Update pipeline_stable_diffusion_3_ipa.py
Browse files
pipeline_stable_diffusion_3_ipa.py
CHANGED
|
@@ -1148,7 +1148,7 @@ class StableDiffusion3Pipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSingle
|
|
| 1148 |
print('Using primary image.')
|
| 1149 |
clip_image = clip_image.resize((max(clip_image.size), max(clip_image.size)))
|
| 1150 |
#clip_image_embeds_1 = self.encode_clip_image_emb(clip_image, device, dtype)
|
| 1151 |
-
with torch.
|
| 1152 |
clip_image_embeds_1 = self.clip_image_processor(images=clip_image, return_tensors="pt").pixel_values
|
| 1153 |
print('clip output shape: ', clip_image_embeds_1.shape)
|
| 1154 |
clip_image_embeds_1 = clip_image_embeds_1.to(device, dtype=dtype)
|
|
@@ -1159,7 +1159,7 @@ class StableDiffusion3Pipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSingle
|
|
| 1159 |
if clip_image_2 != None:
|
| 1160 |
print('Using secondary image.')
|
| 1161 |
clip_image_2 = clip_image_2.resize((max(clip_image_2.size), max(clip_image_2.size)))
|
| 1162 |
-
with torch.
|
| 1163 |
clip_image_embeds_2 = self.clip_image_processor(images=clip_image_2, return_tensors="pt").pixel_values
|
| 1164 |
clip_image_embeds_2 = clip_image_embeds_2.to(device, dtype=dtype)
|
| 1165 |
clip_image_embeds_2 = self.image_encoder(clip_image_embeds_2, output_hidden_states=True).hidden_states[-2]
|
|
@@ -1168,7 +1168,7 @@ class StableDiffusion3Pipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSingle
|
|
| 1168 |
if clip_image_3 != None:
|
| 1169 |
print('Using tertiary image.')
|
| 1170 |
clip_image_3 = clip_image_3.resize((max(clip_image_3.size), max(clip_image_3.size)))
|
| 1171 |
-
with torch.
|
| 1172 |
clip_image_embeds_3 = self.clip_image_processor(images=clip_image_3, return_tensors="pt").pixel_values
|
| 1173 |
clip_image_embeds_3 = clip_image_embeds_3.to(device, dtype=dtype)
|
| 1174 |
clip_image_embeds_3 = self.image_encoder(clip_image_embeds_3, output_hidden_states=True).hidden_states[-2]
|
|
@@ -1177,7 +1177,7 @@ class StableDiffusion3Pipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSingle
|
|
| 1177 |
if clip_image_4 != None:
|
| 1178 |
print('Using quaternary image.')
|
| 1179 |
clip_image_4 = clip_image_4.resize((max(clip_image_4.size), max(clip_image_4.size)))
|
| 1180 |
-
with torch.
|
| 1181 |
clip_image_embeds_4 = self.clip_image_processor(images=clip_image_4, return_tensors="pt").pixel_values
|
| 1182 |
clip_image_embeds_4 = clip_image_embeds_4.to(device, dtype=dtype)
|
| 1183 |
clip_image_embeds_4 = self.image_encoder(clip_image_embeds_4, output_hidden_states=True).hidden_states[-2]
|
|
@@ -1186,7 +1186,7 @@ class StableDiffusion3Pipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSingle
|
|
| 1186 |
if clip_image_5 != None:
|
| 1187 |
print('Using quinary image.')
|
| 1188 |
clip_image_5 = clip_image_5.resize((max(clip_image_5.size), max(clip_image_5.size)))
|
| 1189 |
-
with torch.
|
| 1190 |
clip_image_embeds_5 = self.clip_image_processor(images=clip_image_5, return_tensors="pt").pixel_values
|
| 1191 |
clip_image_embeds_5 = clip_image_embeds_5.to(device, dtype=dtype)
|
| 1192 |
clip_image_embeds_5 = self.image_encoder(clip_image_embeds_5, output_hidden_states=True).hidden_states[-2]
|
|
|
|
| 1148 |
print('Using primary image.')
|
| 1149 |
clip_image = clip_image.resize((max(clip_image.size), max(clip_image.size)))
|
| 1150 |
#clip_image_embeds_1 = self.encode_clip_image_emb(clip_image, device, dtype)
|
| 1151 |
+
with torch.no_grad():
|
| 1152 |
clip_image_embeds_1 = self.clip_image_processor(images=clip_image, return_tensors="pt").pixel_values
|
| 1153 |
print('clip output shape: ', clip_image_embeds_1.shape)
|
| 1154 |
clip_image_embeds_1 = clip_image_embeds_1.to(device, dtype=dtype)
|
|
|
|
| 1159 |
if clip_image_2 != None:
|
| 1160 |
print('Using secondary image.')
|
| 1161 |
clip_image_2 = clip_image_2.resize((max(clip_image_2.size), max(clip_image_2.size)))
|
| 1162 |
+
with torch.no_grad():
|
| 1163 |
clip_image_embeds_2 = self.clip_image_processor(images=clip_image_2, return_tensors="pt").pixel_values
|
| 1164 |
clip_image_embeds_2 = clip_image_embeds_2.to(device, dtype=dtype)
|
| 1165 |
clip_image_embeds_2 = self.image_encoder(clip_image_embeds_2, output_hidden_states=True).hidden_states[-2]
|
|
|
|
| 1168 |
if clip_image_3 != None:
|
| 1169 |
print('Using tertiary image.')
|
| 1170 |
clip_image_3 = clip_image_3.resize((max(clip_image_3.size), max(clip_image_3.size)))
|
| 1171 |
+
with torch.no_grad():
|
| 1172 |
clip_image_embeds_3 = self.clip_image_processor(images=clip_image_3, return_tensors="pt").pixel_values
|
| 1173 |
clip_image_embeds_3 = clip_image_embeds_3.to(device, dtype=dtype)
|
| 1174 |
clip_image_embeds_3 = self.image_encoder(clip_image_embeds_3, output_hidden_states=True).hidden_states[-2]
|
|
|
|
| 1177 |
if clip_image_4 != None:
|
| 1178 |
print('Using quaternary image.')
|
| 1179 |
clip_image_4 = clip_image_4.resize((max(clip_image_4.size), max(clip_image_4.size)))
|
| 1180 |
+
with torch.no_grad():
|
| 1181 |
clip_image_embeds_4 = self.clip_image_processor(images=clip_image_4, return_tensors="pt").pixel_values
|
| 1182 |
clip_image_embeds_4 = clip_image_embeds_4.to(device, dtype=dtype)
|
| 1183 |
clip_image_embeds_4 = self.image_encoder(clip_image_embeds_4, output_hidden_states=True).hidden_states[-2]
|
|
|
|
| 1186 |
if clip_image_5 != None:
|
| 1187 |
print('Using quinary image.')
|
| 1188 |
clip_image_5 = clip_image_5.resize((max(clip_image_5.size), max(clip_image_5.size)))
|
| 1189 |
+
with torch.no_grad():
|
| 1190 |
clip_image_embeds_5 = self.clip_image_processor(images=clip_image_5, return_tensors="pt").pixel_values
|
| 1191 |
clip_image_embeds_5 = clip_image_embeds_5.to(device, dtype=dtype)
|
| 1192 |
clip_image_embeds_5 = self.image_encoder(clip_image_embeds_5, output_hidden_states=True).hidden_states[-2]
|