Merged basemodel Qwen2ForCausalLM(

(model): Qwen2Model(
(embed_tokens): Embedding(151936, 1536)
(layers): ModuleList(
(0-27): 28 x Qwen2DecoderLayer(
(self_attn): Qwen2Attention(
(q_proj): Linear(in_features=1536, out_features=1536, bias=True)
(k_proj): Linear(in_features=1536, out_features=256, bias=True)
(v_proj): Linear(in_features=1536, out_features=256, bias=True)
(o_proj): Linear(in_features=1536, out_features=1536, bias=False)
)
(mlp): Qwen2MLP(
(gate_proj): Linear(in_features=1536, out_features=8960, bias=False)
(up_proj): Linear(in_features=1536, out_features=8960, bias=False)
(down_proj): Linear(in_features=8960, out_features=1536, bias=False)
(act_fn): SiLU()
)
(input_layernorm): Qwen2RMSNorm((1536,), eps=1e-06)
(post_attention_layernorm): Qwen2RMSNorm((1536,), eps=1e-06)
)
)
(norm): Qwen2RMSNorm((1536,), eps=1e-06)
(rotary_emb): Qwen2RotaryEmbedding()
)
(lm_head): Linear(in_features=1536, out_features=151936, bias=False)
) with LoRA adapter leonMW/DeepSeek-R1-Distill-Qwen-1.5B-LORA-GSPO-Basic using revision main

Files changed (1) hide show

model.safetensors +1 -1

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:93f25b67480c67967bd5127d084c9ac1a2c8575ecfe20a92ebfaf3cc29dc46ac
 size 3554214752

 version https://git-lfs.github.com/spec/v1
+oid sha256:fd9c77c3fdf9e370011657943233f1c1587683ab49afbdc1f65a23d80be19dc3
 size 3554214752