from transformers import PreTrainedModel from transformers.models.qwen3.modeling_qwen3 import Qwen3Model, Qwen3Config import torch.nn as nn class Dolphy1ForCausalLM(PreTrainedModel): config_class = Qwen3Config def __init__(self, config): super().__init__(config) self.model = Qwen3Model(config) # If your router was saved as part of the model, this will load it automatically. # No need to redefine or reattach anything here. def forward(self, input_ids, attention_mask=None, **kwargs): return self.model(input_ids=input_ids, attention_mask=attention_mask, **kwargs)