complexly commited on
Commit
5727d10
·
verified ·
1 Parent(s): 0fc302e

llm001 L04 Continue: 1 epoch

Browse files
README.md CHANGED
@@ -12,13 +12,15 @@ tags:
12
 
13
  # complexly/olmo3-190m-zh-continue
14
 
15
- 持续预训练版本:基于 complexly/olmo3-190m-zh-continue
16
- 在 cmz1024/llm101-olmo3-zh-demo-data/continue 数据上继续训练
17
 
18
  ## 训练配置
19
- - 数据:cmz1024/llm101-olmo3-zh-demo-data/continue/full_v31.bin
 
20
  - LR:2e-4(低 LR 防止灾难性遗忘)
21
  - Warmup:10%
 
22
 
23
  ## 用法
24
  ```python
 
12
 
13
  # complexly/olmo3-190m-zh-continue
14
 
15
+ 持续预训练版本:基于 complexly/olmo3-190m-zh-full在42ailab/llm101-v3.1-data数据上继续训练,增强对事实和逻辑的掌握。
16
+ 训练完成后training loss从3.19降到2.60左右,eval loss为1.84左右
17
 
18
  ## 训练配置
19
+ - 数据:42ailab/llm101-v3.1-data/full_v31.bin
20
+ - GPU:A800, 集群slurm+apptainer容器
21
  - LR:2e-4(低 LR 防止灾难性遗忘)
22
  - Warmup:10%
23
+ - max_steps=-1, bs=25×3=73
24
 
25
  ## 用法
26
  ```python
chat_template.jinja ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {% for message in messages %}{% if message['role'] == 'system' %}<|im_start|>system
2
+ {{ message['content'] }}<|im_end|>
3
+ {% elif message['role'] == 'user' %}<|im_start|>user
4
+ {{ message['content'] }}<|im_end|>
5
+ {% elif message['role'] == 'assistant' %}{% generation %}<|im_start|>assistant
6
+ {{ message['content'] }}<|im_end|>
7
+ {% endgeneration %}{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
8
+ {% endif %}
config.json CHANGED
@@ -38,7 +38,7 @@
38
  },
39
  "sliding_window": 4096,
40
  "tie_word_embeddings": false,
41
- "transformers_version": "5.7.0",
42
  "use_cache": false,
43
  "vocab_size": 48000
44
  }
 
38
  },
39
  "sliding_window": 4096,
40
  "tie_word_embeddings": false,
41
+ "transformers_version": "5.8.0",
42
  "use_cache": false,
43
  "vocab_size": 48000
44
  }
generation_config.json CHANGED
@@ -7,6 +7,6 @@
7
  "output_attentions": false,
8
  "output_hidden_states": false,
9
  "pad_token_id": 1,
10
- "transformers_version": "5.7.0",
11
  "use_cache": true
12
  }
 
7
  "output_attentions": false,
8
  "output_hidden_states": false,
9
  "pad_token_id": 1,
10
+ "transformers_version": "5.8.0",
11
  "use_cache": true
12
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c8caf8327e55f7fdbb78a296f3002e835de995799b58da0aa81a8c5fccf4db7
3
  size 374038864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6617b7319b15f332f78f6e24f1300d3aaedad66c257bd0ece037287b56c30175
3
  size 374038864
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f169b07007537609082901be7d085fee91c73bb725ab300982c8e1549f629e7f
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cf7fe3cb996093a1ad560ec66fd4b45a19dd4673a00019e66c1297954c6fc9c
3
  size 4856