| """ | |
| This file exists solely to allow loading the Qwen3ForCausalLM via the AutoModelForMaskedLM class. | |
| Compared to standard Qwen3, we're using bidirectional attention and not causal attention, but it's specified | |
| with `is_causal=False` in the config. | |
| """ | |
| from transformers import Qwen3ForCausalLM as _Qwen3ForCausalLM | |
| class Qwen3ForCausalLM(_Qwen3ForCausalLM): | |
| def tie_weights(self, *args, **kwargs): | |
| """Explicitly re-tie lm_head to embed_tokens to hopefully avoid meta tensor errors.""" | |
| super().tie_weights(*args, **kwargs) | |
| if ( | |
| self.config.tie_word_embeddings | |
| and hasattr(self, "lm_head") | |
| and hasattr(self, "model") | |
| ): | |
| self.lm_head.weight = self.model.embed_tokens.weight | |
| def _init_weights(self, module): | |
| """Skip lm_head init when it will be tied to embed_tokens later.""" | |
| if module is getattr(self, "lm_head", None) and self.config.tie_word_embeddings: | |
| return | |
| super()._init_weights(module) | |
| __all__ = ["Qwen3ForCausalLM"] | |