llm001 L04 Continue: 1 epoch
Browse files- README.md +5 -3
- chat_template.jinja +8 -0
- config.json +1 -1
- generation_config.json +1 -1
- model.safetensors +1 -1
- tokenizer.json +0 -0
- training_args.bin +1 -1
README.md
CHANGED
|
@@ -12,13 +12,15 @@ tags:
|
|
| 12 |
|
| 13 |
# complexly/olmo3-190m-zh-continue
|
| 14 |
|
| 15 |
-
持续预训练版本:基于 complexly/olmo3-190m-zh-
|
| 16 |
-
|
| 17 |
|
| 18 |
## 训练配置
|
| 19 |
-
- 数据:
|
|
|
|
| 20 |
- LR:2e-4(低 LR 防止灾难性遗忘)
|
| 21 |
- Warmup:10%
|
|
|
|
| 22 |
|
| 23 |
## 用法
|
| 24 |
```python
|
|
|
|
| 12 |
|
| 13 |
# complexly/olmo3-190m-zh-continue
|
| 14 |
|
| 15 |
+
持续预训练版本:基于 complexly/olmo3-190m-zh-full,在42ailab/llm101-v3.1-data数据上继续训练,增强对事实和逻辑的掌握。
|
| 16 |
+
训练完成后training loss从3.19降到2.60左右,eval loss为1.84左右
|
| 17 |
|
| 18 |
## 训练配置
|
| 19 |
+
- 数据:42ailab/llm101-v3.1-data/full_v31.bin
|
| 20 |
+
- GPU:A800, 集群slurm+apptainer容器
|
| 21 |
- LR:2e-4(低 LR 防止灾难性遗忘)
|
| 22 |
- Warmup:10%
|
| 23 |
+
- max_steps=-1, bs=25×3=73
|
| 24 |
|
| 25 |
## 用法
|
| 26 |
```python
|
chat_template.jinja
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% for message in messages %}{% if message['role'] == 'system' %}<|im_start|>system
|
| 2 |
+
{{ message['content'] }}<|im_end|>
|
| 3 |
+
{% elif message['role'] == 'user' %}<|im_start|>user
|
| 4 |
+
{{ message['content'] }}<|im_end|>
|
| 5 |
+
{% elif message['role'] == 'assistant' %}{% generation %}<|im_start|>assistant
|
| 6 |
+
{{ message['content'] }}<|im_end|>
|
| 7 |
+
{% endgeneration %}{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
|
| 8 |
+
{% endif %}
|
config.json
CHANGED
|
@@ -38,7 +38,7 @@
|
|
| 38 |
},
|
| 39 |
"sliding_window": 4096,
|
| 40 |
"tie_word_embeddings": false,
|
| 41 |
-
"transformers_version": "5.
|
| 42 |
"use_cache": false,
|
| 43 |
"vocab_size": 48000
|
| 44 |
}
|
|
|
|
| 38 |
},
|
| 39 |
"sliding_window": 4096,
|
| 40 |
"tie_word_embeddings": false,
|
| 41 |
+
"transformers_version": "5.8.0",
|
| 42 |
"use_cache": false,
|
| 43 |
"vocab_size": 48000
|
| 44 |
}
|
generation_config.json
CHANGED
|
@@ -7,6 +7,6 @@
|
|
| 7 |
"output_attentions": false,
|
| 8 |
"output_hidden_states": false,
|
| 9 |
"pad_token_id": 1,
|
| 10 |
-
"transformers_version": "5.
|
| 11 |
"use_cache": true
|
| 12 |
}
|
|
|
|
| 7 |
"output_attentions": false,
|
| 8 |
"output_hidden_states": false,
|
| 9 |
"pad_token_id": 1,
|
| 10 |
+
"transformers_version": "5.8.0",
|
| 11 |
"use_cache": true
|
| 12 |
}
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 374038864
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6617b7319b15f332f78f6e24f1300d3aaedad66c257bd0ece037287b56c30175
|
| 3 |
size 374038864
|
tokenizer.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4856
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7cf7fe3cb996093a1ad560ec66fd4b45a19dd4673a00019e66c1297954c6fc9c
|
| 3 |
size 4856
|