Update app.py
Browse files
app.py
CHANGED
|
@@ -53,8 +53,8 @@ torch_dtype = torch.bfloat16
|
|
| 53 |
|
| 54 |
checkpoint = "microsoft/Phi-3.5-mini-instruct"
|
| 55 |
#vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
|
| 56 |
-
|
| 57 |
-
vae = AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16")
|
| 58 |
|
| 59 |
pipe = StableDiffusion3Pipeline.from_pretrained("ford442/stable-diffusion-3.5-medium-bf16", torch_dtype=torch.bfloat16).to(torch.device("cuda:0"))
|
| 60 |
#pipe = StableDiffusion3Pipeline.from_pretrained("ford442/stable-diffusion-3.5-medium-bf16").to(torch.device("cuda:0"))
|
|
@@ -131,15 +131,20 @@ def infer(
|
|
| 131 |
user_prompt_rewrite = (
|
| 132 |
"Rewrite this prompt to be more descriptive and detailed and only return the rewritten text: "
|
| 133 |
)
|
|
|
|
|
|
|
|
|
|
| 134 |
input_text = f"{system_prompt_rewrite} {user_prompt_rewrite} {prompt}"
|
|
|
|
| 135 |
print("-- got prompt --")
|
| 136 |
# Encode the input text and include the attention mask
|
| 137 |
-
encoded_inputs = tokenizer(
|
| 138 |
-
|
| 139 |
-
)
|
| 140 |
# Ensure all values are on the correct device
|
| 141 |
input_ids = encoded_inputs["input_ids"].to(device)
|
|
|
|
| 142 |
attention_mask = encoded_inputs["attention_mask"].to(device)
|
|
|
|
| 143 |
print("-- tokenize prompt --")
|
| 144 |
# Google T5
|
| 145 |
#input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
|
|
@@ -151,12 +156,24 @@ def infer(
|
|
| 151 |
top_p=0.9,
|
| 152 |
do_sample=True,
|
| 153 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
# Use the encoded tensor 'text_inputs' here
|
| 155 |
enhanced_prompt = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
|
| 156 |
print('-- generated prompt --')
|
| 157 |
enhanced_prompt = filter_text(enhanced_prompt,prompt)
|
|
|
|
| 158 |
print('-- filtered prompt --')
|
| 159 |
print(enhanced_prompt)
|
|
|
|
|
|
|
| 160 |
if latent_file: # Check if a latent file is provided
|
| 161 |
# initial_latents = pipe.prepare_latents(
|
| 162 |
# batch_size=1,
|
|
@@ -188,7 +205,7 @@ def infer(
|
|
| 188 |
with torch.no_grad():
|
| 189 |
sd_image = pipe(
|
| 190 |
prompt=enhanced_prompt, # This conversion is fine
|
| 191 |
-
prompt_2=
|
| 192 |
prompt_3=prompt,
|
| 193 |
negative_prompt=negative_prompt,
|
| 194 |
guidance_scale=guidance_scale,
|
|
@@ -213,13 +230,13 @@ def infer(
|
|
| 213 |
upload_to_ftp(latent_path)
|
| 214 |
#refiner.scheduler.set_timesteps(num_inference_steps,device)
|
| 215 |
refine = refiner(
|
| 216 |
-
prompt=f"{
|
| 217 |
negative_prompt = negative_prompt,
|
| 218 |
guidance_scale=7.5,
|
| 219 |
num_inference_steps=num_inference_steps,
|
| 220 |
image=sd_image,
|
| 221 |
generator=generator,
|
| 222 |
-
).images[0]
|
| 223 |
refine_path = f"sd35m_refine_{seed}.png"
|
| 224 |
refine.save(refine_path,optimize=False,compress_level=0)
|
| 225 |
upload_to_ftp(refine_path)
|
|
|
|
| 53 |
|
| 54 |
checkpoint = "microsoft/Phi-3.5-mini-instruct"
|
| 55 |
#vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
|
| 56 |
+
vae = AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16", torch_dtype=torch.bfloat16).to(torch.device("cuda:0"))
|
| 57 |
+
#vae = AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16")
|
| 58 |
|
| 59 |
pipe = StableDiffusion3Pipeline.from_pretrained("ford442/stable-diffusion-3.5-medium-bf16", torch_dtype=torch.bfloat16).to(torch.device("cuda:0"))
|
| 60 |
#pipe = StableDiffusion3Pipeline.from_pretrained("ford442/stable-diffusion-3.5-medium-bf16").to(torch.device("cuda:0"))
|
|
|
|
| 131 |
user_prompt_rewrite = (
|
| 132 |
"Rewrite this prompt to be more descriptive and detailed and only return the rewritten text: "
|
| 133 |
)
|
| 134 |
+
user_prompt_rewrite_2 = (
|
| 135 |
+
"Rephrase this scene to have more elaborate details: "
|
| 136 |
+
)
|
| 137 |
input_text = f"{system_prompt_rewrite} {user_prompt_rewrite} {prompt}"
|
| 138 |
+
input_text_2 = f"{system_prompt_rewrite} {user_prompt_rewrite_2} {prompt}"
|
| 139 |
print("-- got prompt --")
|
| 140 |
# Encode the input text and include the attention mask
|
| 141 |
+
encoded_inputs = tokenizer(input_text, return_tensors="pt", return_attention_mask=True)
|
| 142 |
+
encoded_inputs_2 = tokenizer(input_text_2, return_tensors="pt", return_attention_mask=True)
|
|
|
|
| 143 |
# Ensure all values are on the correct device
|
| 144 |
input_ids = encoded_inputs["input_ids"].to(device)
|
| 145 |
+
input_ids_2 = encoded_inputs_2["input_ids"].to(device)
|
| 146 |
attention_mask = encoded_inputs["attention_mask"].to(device)
|
| 147 |
+
attention_mask_2 = encoded_inputs_2["attention_mask"].to(device)
|
| 148 |
print("-- tokenize prompt --")
|
| 149 |
# Google T5
|
| 150 |
#input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
|
|
|
|
| 156 |
top_p=0.9,
|
| 157 |
do_sample=True,
|
| 158 |
)
|
| 159 |
+
outputs_2 = model.generate(
|
| 160 |
+
input_ids=input_ids_2,
|
| 161 |
+
attention_mask=attention_mask_2,
|
| 162 |
+
max_new_tokens=65,
|
| 163 |
+
temperature=0.2,
|
| 164 |
+
top_p=0.9,
|
| 165 |
+
do_sample=True,
|
| 166 |
+
)
|
| 167 |
# Use the encoded tensor 'text_inputs' here
|
| 168 |
enhanced_prompt = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 169 |
+
enhanced_prompt_2 = tokenizer.decode(outputs_2[0], skip_special_tokens=True)
|
| 170 |
print('-- generated prompt --')
|
| 171 |
enhanced_prompt = filter_text(enhanced_prompt,prompt)
|
| 172 |
+
enhanced_prompt_2 = filter_text(enhanced_prompt_2,prompt)
|
| 173 |
print('-- filtered prompt --')
|
| 174 |
print(enhanced_prompt)
|
| 175 |
+
print('-- filtered prompt 2 --')
|
| 176 |
+
print(enhanced_prompt_2)
|
| 177 |
if latent_file: # Check if a latent file is provided
|
| 178 |
# initial_latents = pipe.prepare_latents(
|
| 179 |
# batch_size=1,
|
|
|
|
| 205 |
with torch.no_grad():
|
| 206 |
sd_image = pipe(
|
| 207 |
prompt=enhanced_prompt, # This conversion is fine
|
| 208 |
+
prompt_2=enhanced_prompt_2,
|
| 209 |
prompt_3=prompt,
|
| 210 |
negative_prompt=negative_prompt,
|
| 211 |
guidance_scale=guidance_scale,
|
|
|
|
| 230 |
upload_to_ftp(latent_path)
|
| 231 |
#refiner.scheduler.set_timesteps(num_inference_steps,device)
|
| 232 |
refine = refiner(
|
| 233 |
+
prompt=f"{enhanced_prompt_2}, high quality masterpiece, complex details",
|
| 234 |
negative_prompt = negative_prompt,
|
| 235 |
guidance_scale=7.5,
|
| 236 |
num_inference_steps=num_inference_steps,
|
| 237 |
image=sd_image,
|
| 238 |
generator=generator,
|
| 239 |
+
).images[0]
|
| 240 |
refine_path = f"sd35m_refine_{seed}.png"
|
| 241 |
refine.save(refine_path,optimize=False,compress_level=0)
|
| 242 |
upload_to_ftp(refine_path)
|