Merged basemodel Qwen2ForCausalLM(

(model): Qwen2Model(
(embed_tokens): Embedding(152064, 3584)
(layers): ModuleList(
(0-27): 28 x Qwen2DecoderLayer(
(self_attn): Qwen2Attention(
(q_proj): Linear(in_features=3584, out_features=3584, bias=True)
(k_proj): Linear(in_features=3584, out_features=512, bias=True)
(v_proj): Linear(in_features=3584, out_features=512, bias=True)
(o_proj): Linear(in_features=3584, out_features=3584, bias=False)
)
(mlp): Qwen2MLP(
(gate_proj): Linear(in_features=3584, out_features=18944, bias=False)
(up_proj): Linear(in_features=3584, out_features=18944, bias=False)
(down_proj): Linear(in_features=18944, out_features=3584, bias=False)
(act_fn): SiLU()
)
(input_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
(post_attention_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
)
)
(norm): Qwen2RMSNorm((3584,), eps=1e-06)
(rotary_emb): Qwen2RotaryEmbedding()
)
(lm_head): Linear(in_features=3584, out_features=152064, bias=False)
) with LoRA adapter leonMW/DeepSeek-R1-Distill-Qwen-7B-LORA-GSPO-Basic using revision main

Files changed (5) hide show

config.json +2 -2
generation_config.json +1 -1
model-00001-of-00004.safetensors +1 -1
model-00002-of-00004.safetensors +1 -1
model-00003-of-00004.safetensors +1 -1

config.json CHANGED Viewed

@@ -4,6 +4,7 @@
   ],
   "attention_dropout": 0.0,
   "bos_token_id": 151643,
   "eos_token_id": 151643,
   "hidden_act": "silu",
   "hidden_size": 3584,
@@ -50,8 +51,7 @@
   "rope_theta": 10000,
   "sliding_window": null,
   "tie_word_embeddings": false,
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.55.4",
   "use_cache": true,
   "use_mrope": false,
   "use_sliding_window": false,

   ],
   "attention_dropout": 0.0,
   "bos_token_id": 151643,
+  "dtype": "bfloat16",
   "eos_token_id": 151643,
   "hidden_act": "silu",
   "hidden_size": 3584,
   "rope_theta": 10000,
   "sliding_window": null,
   "tie_word_embeddings": false,
+  "transformers_version": "4.56.0",
   "use_cache": true,
   "use_mrope": false,
   "use_sliding_window": false,

generation_config.json CHANGED Viewed

@@ -5,5 +5,5 @@
   "eos_token_id": 151643,
   "temperature": 0.6,
   "top_p": 0.95,
-  "transformers_version": "4.55.4"
 }

   "eos_token_id": 151643,
   "temperature": 0.6,
   "top_p": 0.95,
+  "transformers_version": "4.56.0"
 }

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:804ca83c274abb422e0f9da5b34b43c79674a7efceceed080a5030fe2ab6f01b
 size 4877660776

 version https://git-lfs.github.com/spec/v1
+oid sha256:746eb7bb9e6c52913108c513e8f0bc6b60677a3676f028be5a45b888f35c4768
 size 4877660776

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6f98e8bcfb146bba58592e1d3251d5b10991a256d77a5d256d4ef423e8aae99e
 size 4932751008

 version https://git-lfs.github.com/spec/v1
+oid sha256:dc044b2fc471bbcab21394b4d6c47e0db845c7f9855e061852a7761d5dd82803
 size 4932751008

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c1b9d2cd56088c00c9088b45358a47143bb5473667b4aa9cdf4e63cbd803f1c6
 size 4330865200

 version https://git-lfs.github.com/spec/v1
+oid sha256:06172024ed14497c4c853a040fd8f16f4fd176dc38669095fcf5567c9b52a7d0
 size 4330865200