kangdawei commited on
Commit
7fadf82
·
verified ·
1 Parent(s): 1a11888

End of training

Browse files
README.md CHANGED
@@ -1,17 +1,19 @@
1
  ---
2
  base_model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
 
3
  library_name: transformers
4
  model_name: MMR-DAPO-8B
5
  tags:
6
  - generated_from_trainer
7
- - trl
8
  - dapo
 
9
  licence: license
10
  ---
11
 
12
  # Model Card for MMR-DAPO-8B
13
 
14
- This model is a fine-tuned version of [deepseek-ai/DeepSeek-R1-Distill-Llama-8B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
 
1
  ---
2
  base_model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
3
+ datasets: knoveleng/open-rs
4
  library_name: transformers
5
  model_name: MMR-DAPO-8B
6
  tags:
7
  - generated_from_trainer
8
+ - open-r1
9
  - dapo
10
+ - trl
11
  licence: license
12
  ---
13
 
14
  # Model Card for MMR-DAPO-8B
15
 
16
+ This model is a fine-tuned version of [deepseek-ai/DeepSeek-R1-Distill-Llama-8B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B) on the [knoveleng/open-rs](https://huggingface.co/datasets/knoveleng/open-rs) dataset.
17
  It has been trained using [TRL](https://github.com/huggingface/trl).
18
 
19
  ## Quick start
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7ee9e7c2eac5c0ab901dc35136afccb7eb66edc9f7f816cc94694bcb485013e
3
- size 335605144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60d95b10b6e140a9626a7058d5038528f2ff80148dc4569b881db56052046509
3
+ size 40
config.json CHANGED
@@ -18,6 +18,7 @@
18
  "num_attention_heads": 32,
19
  "num_hidden_layers": 32,
20
  "num_key_value_heads": 8,
 
21
  "pretraining_tp": 1,
22
  "rms_norm_eps": 1e-05,
23
  "rope_scaling": {
 
18
  "num_attention_heads": 32,
19
  "num_hidden_layers": 32,
20
  "num_key_value_heads": 8,
21
+ "pad_token_id": 128001,
22
  "pretraining_tp": 1,
23
  "rms_norm_eps": 1e-05,
24
  "rope_scaling": {
generation_config.json CHANGED
@@ -2,7 +2,10 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 128000,
4
  "do_sample": true,
5
- "eos_token_id": 128001,
 
 
 
6
  "temperature": 0.6,
7
  "top_p": 0.95,
8
  "transformers_version": "4.57.1"
 
2
  "_from_model_config": true,
3
  "bos_token_id": 128000,
4
  "do_sample": true,
5
+ "eos_token_id": [
6
+ 128001
7
+ ],
8
+ "pad_token_id": 128001,
9
  "temperature": 0.6,
10
  "top_p": 0.95,
11
  "transformers_version": "4.57.1"
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97504d83122d8a5af4731acb252a8ae3d76320c82c8a97903d88d7060c7a5cff
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1eca2039ae98b02e4e986fe99b77cf09ba82239af50647213557c772569ac16
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d0c2d1260b05799ef7bcf55090c254386a948c8614aba988e9f9eaf26a62e4b
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a3e5defd741abb945d6cb57fb16a6c217d3e25b8f6b24699bf4562e3e70e941
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:841941aaccd3b48608fedd022dbd51b52dd7a5942dc1e3975a6c29bfe1d294d7
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:646f6506185a597664a44ee0b3c4be1a268e5d190edef360e04873ce2e184fa4
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e58645ff3a4e6e9eefd80ba8c2141b54fdf054de516f9e3f39a8e46fc290829f
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45c0acc80f6fb1339bdc54f358f36c236ae73b2243c74d02197d51b3f37ff741
3
  size 1168138808