leonMW commited on
Commit
55ecd98
·
verified ·
1 Parent(s): ede287f

Merged basemodel Qwen2ForCausalLM(

Browse files

(model): Qwen2Model(
(embed_tokens): Embedding(151936, 1536)
(layers): ModuleList(
(0-27): 28 x Qwen2DecoderLayer(
(self_attn): Qwen2Attention(
(q_proj): Linear(in_features=1536, out_features=1536, bias=True)
(k_proj): Linear(in_features=1536, out_features=256, bias=True)
(v_proj): Linear(in_features=1536, out_features=256, bias=True)
(o_proj): Linear(in_features=1536, out_features=1536, bias=False)
)
(mlp): Qwen2MLP(
(gate_proj): Linear(in_features=1536, out_features=8960, bias=False)
(up_proj): Linear(in_features=1536, out_features=8960, bias=False)
(down_proj): Linear(in_features=8960, out_features=1536, bias=False)
(act_fn): SiLU()
)
(input_layernorm): Qwen2RMSNorm((1536,), eps=1e-06)
(post_attention_layernorm): Qwen2RMSNorm((1536,), eps=1e-06)
)
)
(norm): Qwen2RMSNorm((1536,), eps=1e-06)
(rotary_emb): Qwen2RotaryEmbedding()
)
(lm_head): Linear(in_features=1536, out_features=151936, bias=False)
) with LoRA adapter leonMW/DeepSeek-R1-Distill-Qwen-1.5B-LORA-GSPO-Basic using revision main

Files changed (3) hide show
  1. config.json +2 -2
  2. generation_config.json +1 -1
  3. model.safetensors +1 -1
config.json CHANGED
@@ -4,6 +4,7 @@
4
  ],
5
  "attention_dropout": 0.0,
6
  "bos_token_id": 151643,
 
7
  "eos_token_id": 151643,
8
  "hidden_act": "silu",
9
  "hidden_size": 1536,
@@ -50,8 +51,7 @@
50
  "rope_theta": 10000,
51
  "sliding_window": null,
52
  "tie_word_embeddings": false,
53
- "torch_dtype": "bfloat16",
54
- "transformers_version": "4.55.4",
55
  "use_cache": true,
56
  "use_mrope": false,
57
  "use_sliding_window": false,
 
4
  ],
5
  "attention_dropout": 0.0,
6
  "bos_token_id": 151643,
7
+ "dtype": "bfloat16",
8
  "eos_token_id": 151643,
9
  "hidden_act": "silu",
10
  "hidden_size": 1536,
 
51
  "rope_theta": 10000,
52
  "sliding_window": null,
53
  "tie_word_embeddings": false,
54
+ "transformers_version": "4.56.0",
 
55
  "use_cache": true,
56
  "use_mrope": false,
57
  "use_sliding_window": false,
generation_config.json CHANGED
@@ -5,5 +5,5 @@
5
  "eos_token_id": 151643,
6
  "temperature": 0.6,
7
  "top_p": 0.95,
8
- "transformers_version": "4.55.4"
9
  }
 
5
  "eos_token_id": 151643,
6
  "temperature": 0.6,
7
  "top_p": 0.95,
8
+ "transformers_version": "4.56.0"
9
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd9c77c3fdf9e370011657943233f1c1587683ab49afbdc1f65a23d80be19dc3
3
  size 3554214752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f463ca8c6a75e4a6cacf4f64f0ca918d583501151c9fda98c0a3e83e0bba45d
3
  size 3554214752