Vikhrmodels
/

Borealis-5b-it

@@ -276,6 +276,14 @@ class BorealisForConditionalGeneration(PreTrainedModel):
         llm_config = Qwen3Config.from_pretrained(config.llm_model_name)
         model.llm = Qwen3ForCausalLM(llm_config)
         # Load LLM weights from checkpoint
         llm_state = {
             k.replace("llm.", ""): v

         llm_config = Qwen3Config.from_pretrained(config.llm_model_name)
         model.llm = Qwen3ForCausalLM(llm_config)
+        # Get vocab size from checkpoint
+        embed_weight = state_dict.get("llm.model.embed_tokens.weight")
+        if embed_weight is not None:
+            checkpoint_vocab_size = embed_weight.shape[0]
+            if checkpoint_vocab_size != llm_config.vocab_size:
+                print(f"Resizing embeddings: {llm_config.vocab_size} -> {checkpoint_vocab_size}")
+                model.llm.resize_token_embeddings(checkpoint_vocab_size)
         # Load LLM weights from checkpoint
         llm_state = {
             k.replace("llm.", ""): v