"""
This file exists solely to allow loading the Qwen3ForCausalLM via the AutoModelForMaskedLM class.
Compared to standard Qwen3, we're using bidirectional attention and not causal attention, but it's specified
with `is_causal=False` in the config.
"""

from transformers import Qwen3ForCausalLM as _Qwen3ForCausalLM


class Qwen3ForCausalLM(_Qwen3ForCausalLM):
    def tie_weights(self, *args, **kwargs):
        """Explicitly re-tie lm_head to embed_tokens to hopefully avoid meta tensor errors."""
        super().tie_weights(*args, **kwargs)
        if (
            self.config.tie_word_embeddings
            and hasattr(self, "lm_head")
            and hasattr(self, "model")
        ):
            self.lm_head.weight = self.model.embed_tokens.weight

    def _init_weights(self, module):
        """Skip lm_head init when it will be tied to embed_tokens later."""
        if module is getattr(self, "lm_head", None) and self.config.tie_word_embeddings:
            return
        super()._init_weights(module)


__all__ = ["Qwen3ForCausalLM"]