llm001 L04 Continue: 1 epoch

Files changed (7) hide show

README.md CHANGED Viewed

@@ -12,13 +12,15 @@ tags:
 # complexly/olmo3-190m-zh-continue
-持续预训练版本：基于 complexly/olmo3-190m-zh-continue，
-在 cmz1024/llm101-olmo3-zh-demo-data/continue 数据上继续训练。
 ## 训练配置
-- 数据：cmz1024/llm101-olmo3-zh-demo-data/continue/full_v31.bin
 - LR：2e-4（低 LR 防止灾难性遗忘）
 - Warmup：10%
 ## 用法
 ```python

 # complexly/olmo3-190m-zh-continue
+持续预训练版本：基于 complexly/olmo3-190m-zh-full，在42ailab/llm101-v3.1-data数据上继续训练，增强对事实和逻辑的掌握。
+训练完成后training loss从3.19降到2.60左右，eval loss为1.84左右
 ## 训练配置
+- 数据：42ailab/llm101-v3.1-data/full_v31.bin
+- GPU：A800, 集群slurm+apptainer容器
 - LR：2e-4（低 LR 防止灾难性遗忘）
 - Warmup：10%
+- max_steps=-1, bs=25×3=73
 ## 用法
 ```python

chat_template.jinja ADDED Viewed

+{% for message in messages %}{% if message['role'] == 'system' %}<|im_start|>system
+{{ message['content'] }}<|im_end|>
+{% elif message['role'] == 'user' %}<|im_start|>user
+{{ message['content'] }}<|im_end|>
+{% elif message['role'] == 'assistant' %}{% generation %}<|im_start|>assistant
+{{ message['content'] }}<|im_end|>
+{% endgeneration %}{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
+{% endif %}

config.json CHANGED Viewed

@@ -38,7 +38,7 @@
   },
   "sliding_window": 4096,
   "tie_word_embeddings": false,
-  "transformers_version": "5.7.0",
   "use_cache": false,
   "vocab_size": 48000
 }

   },
   "sliding_window": 4096,
   "tie_word_embeddings": false,
+  "transformers_version": "5.8.0",
   "use_cache": false,
   "vocab_size": 48000
 }

generation_config.json CHANGED Viewed

@@ -7,6 +7,6 @@
   "output_attentions": false,
   "output_hidden_states": false,
   "pad_token_id": 1,
-  "transformers_version": "5.7.0",
   "use_cache": true
 }

   "output_attentions": false,
   "output_hidden_states": false,
   "pad_token_id": 1,
+  "transformers_version": "5.8.0",
   "use_cache": true
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7c8caf8327e55f7fdbb78a296f3002e835de995799b58da0aa81a8c5fccf4db7
 size 374038864

 version https://git-lfs.github.com/spec/v1
+oid sha256:6617b7319b15f332f78f6e24f1300d3aaedad66c257bd0ece037287b56c30175
 size 374038864

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f169b07007537609082901be7d085fee91c73bb725ab300982c8e1549f629e7f
 size 4856

 version https://git-lfs.github.com/spec/v1
+oid sha256:7cf7fe3cb996093a1ad560ec66fd4b45a19dd4673a00019e66c1297954c6fc9c
 size 4856