Merged basemodel Qwen2ForCausalLM(
Browse files(model): Qwen2Model(
(embed_tokens): Embedding(151936, 1536)
(layers): ModuleList(
(0-27): 28 x Qwen2DecoderLayer(
(self_attn): Qwen2Attention(
(q_proj): Linear(in_features=1536, out_features=1536, bias=True)
(k_proj): Linear(in_features=1536, out_features=256, bias=True)
(v_proj): Linear(in_features=1536, out_features=256, bias=True)
(o_proj): Linear(in_features=1536, out_features=1536, bias=False)
)
(mlp): Qwen2MLP(
(gate_proj): Linear(in_features=1536, out_features=8960, bias=False)
(up_proj): Linear(in_features=1536, out_features=8960, bias=False)
(down_proj): Linear(in_features=8960, out_features=1536, bias=False)
(act_fn): SiLU()
)
(input_layernorm): Qwen2RMSNorm((1536,), eps=1e-06)
(post_attention_layernorm): Qwen2RMSNorm((1536,), eps=1e-06)
)
)
(norm): Qwen2RMSNorm((1536,), eps=1e-06)
(rotary_emb): Qwen2RotaryEmbedding()
)
(lm_head): Linear(in_features=1536, out_features=151936, bias=False)
) with LoRA adapter leonMW/DeepSeek-R1-Distill-Qwen-1.5B-LORA-GSPO-Basic using revision main
- config.json +2 -2
- generation_config.json +1 -1
- model.safetensors +1 -1
|
@@ -4,6 +4,7 @@
|
|
| 4 |
],
|
| 5 |
"attention_dropout": 0.0,
|
| 6 |
"bos_token_id": 151643,
|
|
|
|
| 7 |
"eos_token_id": 151643,
|
| 8 |
"hidden_act": "silu",
|
| 9 |
"hidden_size": 1536,
|
|
@@ -50,8 +51,7 @@
|
|
| 50 |
"rope_theta": 10000,
|
| 51 |
"sliding_window": null,
|
| 52 |
"tie_word_embeddings": false,
|
| 53 |
-
"
|
| 54 |
-
"transformers_version": "4.55.4",
|
| 55 |
"use_cache": true,
|
| 56 |
"use_mrope": false,
|
| 57 |
"use_sliding_window": false,
|
|
|
|
| 4 |
],
|
| 5 |
"attention_dropout": 0.0,
|
| 6 |
"bos_token_id": 151643,
|
| 7 |
+
"dtype": "bfloat16",
|
| 8 |
"eos_token_id": 151643,
|
| 9 |
"hidden_act": "silu",
|
| 10 |
"hidden_size": 1536,
|
|
|
|
| 51 |
"rope_theta": 10000,
|
| 52 |
"sliding_window": null,
|
| 53 |
"tie_word_embeddings": false,
|
| 54 |
+
"transformers_version": "4.56.0",
|
|
|
|
| 55 |
"use_cache": true,
|
| 56 |
"use_mrope": false,
|
| 57 |
"use_sliding_window": false,
|
|
@@ -5,5 +5,5 @@
|
|
| 5 |
"eos_token_id": 151643,
|
| 6 |
"temperature": 0.6,
|
| 7 |
"top_p": 0.95,
|
| 8 |
-
"transformers_version": "4.
|
| 9 |
}
|
|
|
|
| 5 |
"eos_token_id": 151643,
|
| 6 |
"temperature": 0.6,
|
| 7 |
"top_p": 0.95,
|
| 8 |
+
"transformers_version": "4.56.0"
|
| 9 |
}
|
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3554214752
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f463ca8c6a75e4a6cacf4f64f0ca918d583501151c9fda98c0a3e83e0bba45d
|
| 3 |
size 3554214752
|