kangdawei commited on
Commit
927a04d
·
verified ·
1 Parent(s): adecf51

End of training

Browse files
README.md CHANGED
@@ -1,17 +1,19 @@
1
  ---
2
  base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
 
3
  library_name: transformers
4
  model_name: MMR-Sigmoid-DAPO-7B
5
  tags:
6
  - generated_from_trainer
7
- - trl
8
  - dapo
 
9
  licence: license
10
  ---
11
 
12
  # Model Card for MMR-Sigmoid-DAPO-7B
13
 
14
- This model is a fine-tuned version of [deepseek-ai/DeepSeek-R1-Distill-Qwen-7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
 
1
  ---
2
  base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
3
+ datasets: knoveleng/open-rs
4
  library_name: transformers
5
  model_name: MMR-Sigmoid-DAPO-7B
6
  tags:
7
  - generated_from_trainer
8
+ - open-r1
9
  - dapo
10
+ - trl
11
  licence: license
12
  ---
13
 
14
  # Model Card for MMR-Sigmoid-DAPO-7B
15
 
16
+ This model is a fine-tuned version of [deepseek-ai/DeepSeek-R1-Distill-Qwen-7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) on the [knoveleng/open-rs](https://huggingface.co/datasets/knoveleng/open-rs) dataset.
17
  It has been trained using [TRL](https://github.com/huggingface/trl).
18
 
19
  ## Quick start
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f73ce5bb2236b4cdce3e19a4b94c89f63a08ff478d69f9c59c55273bb37335ba
3
- size 323014560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60d95b10b6e140a9626a7058d5038528f2ff80148dc4569b881db56052046509
3
+ size 40
config.json CHANGED
@@ -3,7 +3,7 @@
3
  "Qwen2ForCausalLM"
4
  ],
5
  "attention_dropout": 0.0,
6
- "bos_token_id": 151643,
7
  "dtype": "bfloat16",
8
  "eos_token_id": 151643,
9
  "hidden_act": "silu",
@@ -46,6 +46,7 @@
46
  "num_attention_heads": 28,
47
  "num_hidden_layers": 28,
48
  "num_key_value_heads": 4,
 
49
  "rms_norm_eps": 1e-06,
50
  "rope_scaling": null,
51
  "rope_theta": 10000,
 
3
  "Qwen2ForCausalLM"
4
  ],
5
  "attention_dropout": 0.0,
6
+ "bos_token_id": 151646,
7
  "dtype": "bfloat16",
8
  "eos_token_id": 151643,
9
  "hidden_act": "silu",
 
46
  "num_attention_heads": 28,
47
  "num_hidden_layers": 28,
48
  "num_key_value_heads": 4,
49
+ "pad_token_id": 151643,
50
  "rms_norm_eps": 1e-06,
51
  "rope_scaling": null,
52
  "rope_theta": 10000,
generation_config.json CHANGED
@@ -2,7 +2,10 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 151646,
4
  "do_sample": true,
5
- "eos_token_id": 151643,
 
 
 
6
  "temperature": 0.6,
7
  "top_p": 0.95,
8
  "transformers_version": "4.57.1"
 
2
  "_from_model_config": true,
3
  "bos_token_id": 151646,
4
  "do_sample": true,
5
+ "eos_token_id": [
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
  "temperature": 0.6,
10
  "top_p": 0.95,
11
  "transformers_version": "4.57.1"
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abb5dbf948772449c6d7308911db9995e65427350d89c564eb62d309161f6724
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9142bf7a9d8f65d00b8c23a51d0c3aa94fee7236d399f19edcd0b0f5ffbd3194
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89bcf86a06534b25c0267c30660f5990afd20d5759c346bdff9301b9ef7f7c93
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28f25564a2353df691bf70435186f1470af42384e753840a1c7dc3be9e4970b5
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86a270e9eb2a3777089e8dde6727e8d7e7f1adfb8809c44a4f01326e33e9479e
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bb46f5dc66b5f9fc8ad7c1ead1771218ca8ba5e52901927da1a8a66e05022e5
3
  size 4330865200