naver
/

splade-code-06B

Feature Extraction

sentence-transformers

text-generation

text-embeddings-inference

Model card Files Files and versions

Tom Aarsen commited on Mar 26

Commit

c4c1b0e

·

1 Parent(s): d71d855

Attempt to fix meta tensor loading error

Files changed (1) hide show

modeling_splade.py +22 -2

modeling_splade.py CHANGED Viewed

@@ -3,6 +3,26 @@ This file exists solely to allow loading the Qwen3ForCausalLM via the AutoModelF
 Compared to standard Qwen3, we're using bidirectional attention and not causal attention, but it's specified
 with `is_causal=False` in the config.
 """
-from transformers import Qwen3ForCausalLM
-__all__ = ["Qwen3ForCausalLM"]

 Compared to standard Qwen3, we're using bidirectional attention and not causal attention, but it's specified
 with `is_causal=False` in the config.
 """
+from transformers import Qwen3ForCausalLM as _Qwen3ForCausalLM
+class Qwen3ForCausalLM(_Qwen3ForCausalLM):
+    def tie_weights(self, *args, **kwargs):
+        """Explicitly re-tie lm_head to embed_tokens to hopefully avoid meta tensor errors."""
+        super().tie_weights(*args, **kwargs)
+        if (
+            self.config.tie_word_embeddings
+            and hasattr(self, "lm_head")
+            and hasattr(self, "model")
+        ):
+            self.lm_head.weight = self.model.embed_tokens.weight
+    def _init_weights(self, module):
+        """Skip lm_head init when it will be tied to embed_tokens later."""
+        if module is getattr(self, "lm_head", None) and self.config.tie_word_embeddings:
+            return
+        super()._init_weights(module)
+__all__ = ["Qwen3ForCausalLM"]