from transformers import PreTrainedModel
from transformers.models.qwen3.modeling_qwen3 import Qwen3Model, Qwen3Config
import torch.nn as nn

class Dolphy1ForCausalLM(PreTrainedModel):
    config_class = Qwen3Config

    def __init__(self, config):
        super().__init__(config)
        self.model = Qwen3Model(config)

        # If your router was saved as part of the model, this will load it automatically.
        # No need to redefine or reattach anything here.

    def forward(self, input_ids, attention_mask=None, **kwargs):
        return self.model(input_ids=input_ids, attention_mask=attention_mask, **kwargs)