Upload modeling_borealis.py with huggingface_hub
Browse files- modeling_borealis.py +8 -0
modeling_borealis.py
CHANGED
|
@@ -276,6 +276,14 @@ class BorealisForConditionalGeneration(PreTrainedModel):
|
|
| 276 |
llm_config = Qwen3Config.from_pretrained(config.llm_model_name)
|
| 277 |
model.llm = Qwen3ForCausalLM(llm_config)
|
| 278 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
# Load LLM weights from checkpoint
|
| 280 |
llm_state = {
|
| 281 |
k.replace("llm.", ""): v
|
|
|
|
| 276 |
llm_config = Qwen3Config.from_pretrained(config.llm_model_name)
|
| 277 |
model.llm = Qwen3ForCausalLM(llm_config)
|
| 278 |
|
| 279 |
+
# Get vocab size from checkpoint
|
| 280 |
+
embed_weight = state_dict.get("llm.model.embed_tokens.weight")
|
| 281 |
+
if embed_weight is not None:
|
| 282 |
+
checkpoint_vocab_size = embed_weight.shape[0]
|
| 283 |
+
if checkpoint_vocab_size != llm_config.vocab_size:
|
| 284 |
+
print(f"Resizing embeddings: {llm_config.vocab_size} -> {checkpoint_vocab_size}")
|
| 285 |
+
model.llm.resize_token_embeddings(checkpoint_vocab_size)
|
| 286 |
+
|
| 287 |
# Load LLM weights from checkpoint
|
| 288 |
llm_state = {
|
| 289 |
k.replace("llm.", ""): v
|