Spaces:

dezzman
/

diffusion_models

Running

App Files Files Community

dezzman commited on Feb 2, 2025

Commit

c9f36bf

verified ·

1 Parent(s): 3ca8699

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -4

app.py CHANGED Viewed

@@ -27,6 +27,9 @@ def get_lora_sd_pipeline(
     pipe = StableDiffusionPipeline.from_pretrained(base_model_name_or_path, torch_dtype=dtype)
     pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)
     if os.path.exists(text_encoder_sub_dir):
         pipe.text_encoder = PeftModel.from_pretrained(
@@ -36,9 +39,52 @@ def get_lora_sd_pipeline(
     if dtype in (torch.float16, torch.bfloat16):
         pipe.unet.half()
         pipe.text_encoder.half()
     return pipe
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_id_default = "CompVis/stable-diffusion-v1-4"
@@ -76,8 +122,8 @@ def infer(
     generator = torch.Generator().manual_seed(seed)
     params = {
-        'prompt': prompt,
-        'negative_prompt': negative_prompt,
         'guidance_scale': guidance_scale,
         'num_inference_steps': num_inference_steps,
         'width': width,
@@ -88,9 +134,20 @@ def infer(
     if model_id != model_id_default:
         pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch_dtype)
         pipe = pipe.to(device)
-        pipe.fuse_lora(lora_scale=lora_scale)
         image = pipe(**params).images[0]
     else:
         pipe_default.fuse_lora(lora_scale=lora_scale)
         image = pipe_default(**params).images[0]

     pipe = StableDiffusionPipeline.from_pretrained(base_model_name_or_path, torch_dtype=dtype)
     pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)
+    print(os.path.exists(unet_sub_dir))
+    print(unet_sub_dir)
+    print(dtype)
     if os.path.exists(text_encoder_sub_dir):
         pipe.text_encoder = PeftModel.from_pretrained(
     if dtype in (torch.float16, torch.bfloat16):
         pipe.unet.half()
         pipe.text_encoder.half()
     return pipe
+def split_prompt(prompt, tokenizer, max_length=77):
+    tokens = tokenizer(prompt, truncation=False)["input_ids"]
+    chunks = [tokens[i:i + max_length] for i in range(0, len(tokens), max_length)]
+    return chunks
+def get_prompt_embeds(prompt_chunks, text_encoder):
+    prompt_embeds = []
+    for chunk in prompt_chunks:
+        chunk_tensor = torch.tensor([chunk]).to(text_encoder.device)
+        with torch.no_grad():
+            embeds = text_encoder(chunk_tensor)[0]
+        prompt_embeds.append(embeds)
+    return torch.cat(prompt_embeds, dim=1)
+def shape_alignment(prompt_embeds, negative_prompt_embeds):
+    max_length = max(prompt_embeds.shape[1], negative_prompt_embeds.shape[1])
+    def pad_to_max_length(tensor, target_length):
+        padding = target_length - tensor.shape[1]
+        if padding > 0:
+            pad_tensor = torch.zeros(
+                tensor.shape[0], padding, tensor.shape[2], device=tensor.device
+            )
+            tensor = torch.cat([tensor, pad_tensor], dim=1)
+        return tensor
+    prompt_embeds = pad_to_max_length(prompt_embeds, max_length)
+    negative_prompt_embeds = pad_to_max_length(negative_prompt_embeds, max_length)
+    assert prompt_embeds.shape == negative_prompt_embeds.shape, "Shapes do not match!"
+    return prompt_embeds, negative_prompt_embeds
+def prompts_embeddings(prompt, negative_promt, tokenizer, text_encoder):
+    prompt_chunks = split_prompt(prompt, tokenizer)
+    negative_prompt_chunks = split_prompt(negative_prompt, tokenizer)
+    prompt_embeds = get_prompt_embeds(prompt_chunks, text_encoder)
+    negative_prompt_embeds = get_prompt_embeds(negative_prompt_chunks, text_encoder)
+    prompt_embeds, negative_prompt_embeds = shape_alignment(prompt_embeds, negative_prompt_embeds)
+    return prompt_embeds, negative_prompt_embeds
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_id_default = "CompVis/stable-diffusion-v1-4"
     generator = torch.Generator().manual_seed(seed)
     params = {
+        # 'prompt': prompt,
+        # 'negative_prompt': negative_prompt,
         'guidance_scale': guidance_scale,
         'num_inference_steps': num_inference_steps,
         'width': width,
     if model_id != model_id_default:
         pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch_dtype)
         pipe = pipe.to(device)
         image = pipe(**params).images[0]
     else:
+        print('----')
+        print(lora_scale)
+        print(prompt)
+        print(negative_prompt)
+        prompt_embeds, negative_prompt_embeds = prompts_embeddings(
+            prompt,
+            negative_prompt,
+            pipe_default.tokenizer,
+            pipe_default.text_encoder
+            )
+        params['prompt_embeds'] = prompt_embeds
+        params['negative_prompt_embeds']=negative_prompt_embeds
         pipe_default.fuse_lora(lora_scale=lora_scale)
         image = pipe_default(**params).images[0]