Spaces:

HSinghHuggingFace
/

stable-diffusion-image-generator

Sleeping

App Files Files Community

HSinghHuggingFace commited on Feb 26, 2025

Commit

a04f2f2

1 Parent(s): 17131ca

Fix token embedding size mismatch

Browse files

Files changed (1) hide show

src/utils/style_generator.py +24 -14

src/utils/style_generator.py CHANGED Viewed

@@ -77,6 +77,7 @@ class StyleTransfer:
         # Get the expected dimension from the text encoder
         expected_dim = self.pipeline.text_encoder.get_input_embeddings().weight.shape[1]
         current_dim = embeds.shape[0]
         # Resize embeddings if dimensions don't match
@@ -85,7 +86,8 @@ class StyleTransfer:
             if current_dim > expected_dim:
                 embeds = embeds[:expected_dim]
             else:
-                embeds = torch.cat([embeds, torch.zeros(expected_dim - current_dim)], dim=0)
         # Reshape to match expected dimensions
         embeds = embeds.unsqueeze(0)  # Add batch dimension
@@ -94,16 +96,21 @@ class StyleTransfer:
         dtype = self.pipeline.text_encoder.get_input_embeddings().weight.dtype
         embeds = embeds.to(dtype)
-        # Add the token in tokenizer
         token = token if token is not None else trained_token
-        self.pipeline.tokenizer.add_tokens(token)
-        # Resize the token embeddings
-        self.pipeline.text_encoder.resize_token_embeddings(len(self.pipeline.tokenizer))
-        # Get the id for the token and assign the embeds
-        token_id = self.pipeline.tokenizer.convert_tokens_to_ids(token)
-        self.pipeline.text_encoder.get_input_embeddings().weight.data[token_id] = embeds[0]
         return token
     def generate_artwork(self, prompt, selected_style):
@@ -157,20 +164,23 @@ class StyleTransfer:
                 loss = self._calculate_color_distance(latents_copy)
                 # Compute gradients
-                if loss.requires_grad:
                     grads = torch.autograd.grad(
                         outputs=loss,
                         inputs=latents_copy,
                         allow_unused=True,
                         retain_graph=False
-                    )[0]
-                    if grads is not None:
-                        # Apply gradients to original latents
-                        return latents - 0.1 * grads.detach()
             except Exception as e:
                 print(f"Error in color enhancement: {e}")
         return latents

         # Get the expected dimension from the text encoder
         expected_dim = self.pipeline.text_encoder.get_input_embeddings().weight.shape[1]
+        vocab_size = self.pipeline.text_encoder.get_input_embeddings().weight.shape[0]
         current_dim = embeds.shape[0]
         # Resize embeddings if dimensions don't match
             if current_dim > expected_dim:
                 embeds = embeds[:expected_dim]
             else:
+                padding = torch.zeros(expected_dim - current_dim, device=embeds.device, dtype=embeds.dtype)
+                embeds = torch.cat([embeds, padding], dim=0)
         # Reshape to match expected dimensions
         embeds = embeds.unsqueeze(0)  # Add batch dimension
         dtype = self.pipeline.text_encoder.get_input_embeddings().weight.dtype
         embeds = embeds.to(dtype)
+        # Add the token in tokenizer and handle embedding resize
         token = token if token is not None else trained_token
+        num_added_tokens = self.pipeline.tokenizer.add_tokens(token)
+        if num_added_tokens > 0:
+            # Safely resize token embeddings
+            self.pipeline.text_encoder.resize_token_embeddings(len(self.pipeline.tokenizer))
+            # Get the id for the token and assign the embeds
+            token_id = self.pipeline.tokenizer.convert_tokens_to_ids(token)
+            if token_id < self.pipeline.text_encoder.get_input_embeddings().weight.shape[0]:
+                self.pipeline.text_encoder.get_input_embeddings().weight.data[token_id] = embeds
+            else:
+                print(f"Warning: Token ID {token_id} is out of bounds. Skipping embedding assignment.")
         return token
     def generate_artwork(self, prompt, selected_style):
                 loss = self._calculate_color_distance(latents_copy)
                 # Compute gradients
+                if loss is not None and loss.requires_grad:
                     grads = torch.autograd.grad(
                         outputs=loss,
                         inputs=latents_copy,
                         allow_unused=True,
                         retain_graph=False
+                    )
+                    if grads and grads[0] is not None:
+                        # Apply gradients to original latents with safety checks
+                        grad_tensor = grads[0].detach()
+                        if grad_tensor.shape == latents.shape:
+                            return latents - 0.1 * grad_tensor
             except Exception as e:
                 print(f"Error in color enhancement: {e}")
+                # Continue without enhancement on error
         return latents