inclusionAI
/

LLaDA2.1-flash

@@ -687,6 +687,7 @@ class LLaDA2MoePreTrainedModel(PreTrainedModel):
     _supports_cache_class = True
     def _init_weights(self, module):
         std = self.config.initializer_range
         if isinstance(module, nn.Linear):
             module.weight.data.normal_(mean=0.0, std=std)

     _supports_cache_class = True
     def _init_weights(self, module):
+        super()._init_weights(module)
         std = self.config.initializer_range
         if isinstance(module, nn.Linear):
             module.weight.data.normal_(mean=0.0, std=std)