Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/added_tokens.json +5 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/chat_template.jinja +6 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/config.json +39 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/generation_config.json +6 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/lr_scheduler.pt +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/merges.txt +0 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/model.safetensors +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/optimizer.pt +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/projector_0.pt +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/projector_1.pt +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/projector_2.pt +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/projector_3.pt +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/projector_4.pt +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/projector_5.pt +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/projector_6.pt +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/projector_7.pt +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/special_tokens_map.json +14 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/tokenizer.json +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/tokenizer_config.json +47 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/vocab.json +0 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/tuandao_qwen1.5-1.8b_to_gpt2-120m.log +48 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/tuandao_qwen1.5-1.8b_to_gpt2-120m_metrics.jsonl +0 -0
.gitattributes
CHANGED
|
@@ -146,3 +146,4 @@ experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_
|
|
| 146 |
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_16/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 147 |
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_17/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 148 |
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_18/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 146 |
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_16/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 147 |
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_17/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 148 |
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_18/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 149 |
+
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/added_tokens.json
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"<|endoftext|>": 151643,
|
| 3 |
+
"<|im_end|>": 151645,
|
| 4 |
+
"<|im_start|>": 151644
|
| 5 |
+
}
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/chat_template.jinja
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system
|
| 2 |
+
You are a helpful assistant<|im_end|>
|
| 3 |
+
' }}{% endif %}{{'<|im_start|>' + message['role'] + '
|
| 4 |
+
' + message['content'] + '<|im_end|>' + '
|
| 5 |
+
'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
|
| 6 |
+
' }}{% endif %}
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"activation_function": "gelu_new",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"GPT2LMHeadModel"
|
| 5 |
+
],
|
| 6 |
+
"attn_pdrop": 0.1,
|
| 7 |
+
"bos_token_id": 50256,
|
| 8 |
+
"dtype": "float32",
|
| 9 |
+
"embd_pdrop": 0.1,
|
| 10 |
+
"eos_token_id": 50256,
|
| 11 |
+
"initializer_range": 0.02,
|
| 12 |
+
"layer_norm_epsilon": 1e-05,
|
| 13 |
+
"model_type": "gpt2",
|
| 14 |
+
"n_ctx": 1024,
|
| 15 |
+
"n_embd": 768,
|
| 16 |
+
"n_head": 12,
|
| 17 |
+
"n_inner": null,
|
| 18 |
+
"n_layer": 12,
|
| 19 |
+
"n_positions": 1024,
|
| 20 |
+
"pad_token_id": 50256,
|
| 21 |
+
"reorder_and_upcast_attn": false,
|
| 22 |
+
"resid_pdrop": 0.1,
|
| 23 |
+
"scale_attn_by_inverse_layer_idx": false,
|
| 24 |
+
"scale_attn_weights": true,
|
| 25 |
+
"summary_activation": null,
|
| 26 |
+
"summary_first_dropout": 0.1,
|
| 27 |
+
"summary_proj_to_labels": true,
|
| 28 |
+
"summary_type": "cls_index",
|
| 29 |
+
"summary_use_proj": true,
|
| 30 |
+
"task_specific_params": {
|
| 31 |
+
"text-generation": {
|
| 32 |
+
"do_sample": true,
|
| 33 |
+
"max_length": 50
|
| 34 |
+
}
|
| 35 |
+
},
|
| 36 |
+
"transformers_version": "4.56.0",
|
| 37 |
+
"use_cache": true,
|
| 38 |
+
"vocab_size": 50257
|
| 39 |
+
}
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/generation_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"bos_token_id": 50256,
|
| 4 |
+
"eos_token_id": 50256,
|
| 5 |
+
"transformers_version": "4.56.0"
|
| 6 |
+
}
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/lr_scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4dd12d1ba8559b15b6467eff107c00b9a21fd7e64d4653ab460d73f1e4ea9467
|
| 3 |
+
size 1483
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9a29904bbe34a5e1426b821debb738d218e113f29d39e8e4a7b9bb093dff7c1
|
| 3 |
+
size 497774208
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:76e9e64aaea96d48de6480f44bc014be0cd20fa6537779df9af071d3f11576e9
|
| 3 |
+
size 1096368715
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/projector_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dff0a1b67645b44261ef49e5787de3ec9cf9b56663657ecd56f0ba3fdc15952b
|
| 3 |
+
size 6296517
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/projector_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8dea6d992f91c883d7185395bb16553f677abbb827705fad1c5fa7f307f966f7
|
| 3 |
+
size 6296517
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/projector_2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7b1f71d183a67517c5671aa2b4e2e83b8f21296f884ac53fa14675102c401862
|
| 3 |
+
size 6296517
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/projector_3.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:86637d792b6849ae808c4e3fbd3fa357a0243d3df11a3f99d9de086663390412
|
| 3 |
+
size 6296517
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/projector_4.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3262b20802339e4d328f148bcc06b35c049ef8c94471d2f4e37fabf4adebcc75
|
| 3 |
+
size 6296517
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/projector_5.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c75d83f29a47d7a8f174819e690527d32654f1a6b1ce9ad9d886c0536b7aefaf
|
| 3 |
+
size 6296517
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/projector_6.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9daf055b5091cd4d357ac8f2efa32067f76fc561ac41d6d574bbaabb42a704e1
|
| 3 |
+
size 6296517
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/projector_7.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9c53fde7a7b1f7d086ed5666c393e3ab5c26404388ca31bb1051f5964b538bc
|
| 3 |
+
size 6296517
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/special_tokens_map.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|im_start|>",
|
| 4 |
+
"<|im_end|>"
|
| 5 |
+
],
|
| 6 |
+
"eos_token": {
|
| 7 |
+
"content": "<|endoftext|>",
|
| 8 |
+
"lstrip": false,
|
| 9 |
+
"normalized": false,
|
| 10 |
+
"rstrip": false,
|
| 11 |
+
"single_word": false
|
| 12 |
+
},
|
| 13 |
+
"pad_token": "<|endoftext|>"
|
| 14 |
+
}
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c6f32fb0a832e7efb2c2de5e805c8aaaf43e933c191ffc8d7cb56b176e0f11b
|
| 3 |
+
size 11418364
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/tokenizer_config.json
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"added_tokens_decoder": {
|
| 4 |
+
"151643": {
|
| 5 |
+
"content": "<|endoftext|>",
|
| 6 |
+
"lstrip": false,
|
| 7 |
+
"normalized": false,
|
| 8 |
+
"rstrip": false,
|
| 9 |
+
"single_word": false,
|
| 10 |
+
"special": true
|
| 11 |
+
},
|
| 12 |
+
"151644": {
|
| 13 |
+
"content": "<|im_start|>",
|
| 14 |
+
"lstrip": false,
|
| 15 |
+
"normalized": false,
|
| 16 |
+
"rstrip": false,
|
| 17 |
+
"single_word": false,
|
| 18 |
+
"special": true
|
| 19 |
+
},
|
| 20 |
+
"151645": {
|
| 21 |
+
"content": "<|im_end|>",
|
| 22 |
+
"lstrip": false,
|
| 23 |
+
"normalized": false,
|
| 24 |
+
"rstrip": false,
|
| 25 |
+
"single_word": false,
|
| 26 |
+
"special": true
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"additional_special_tokens": [
|
| 30 |
+
"<|im_start|>",
|
| 31 |
+
"<|im_end|>"
|
| 32 |
+
],
|
| 33 |
+
"bos_token": null,
|
| 34 |
+
"clean_up_tokenization_spaces": false,
|
| 35 |
+
"eos_token": "<|endoftext|>",
|
| 36 |
+
"errors": "replace",
|
| 37 |
+
"extra_special_tokens": {},
|
| 38 |
+
"max_length": 256,
|
| 39 |
+
"model_max_length": 32768,
|
| 40 |
+
"pad_token": "<|endoftext|>",
|
| 41 |
+
"split_special_tokens": false,
|
| 42 |
+
"stride": 0,
|
| 43 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 44 |
+
"truncation_side": "right",
|
| 45 |
+
"truncation_strategy": "longest_first",
|
| 46 |
+
"unk_token": null
|
| 47 |
+
}
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/checkpoints/epoch_19/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/tuandao_qwen1.5-1.8b_to_gpt2-120m.log
CHANGED
|
@@ -1012,3 +1012,51 @@
|
|
| 1012 |
2025-11-30 16:12:29,508 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1013 |
2025-11-30 16:12:30,027 - root - [32m[1mINFO[0m - Step 27265/28600 train rougeL: 1.0
|
| 1014 |
2025-11-30 16:12:30,090 - root - [32m[1mINFO[0m - Step 27265/28600 loss: 0.6465226411819458, nll_loss: 0.006603807210922241, distill_loss: 0.3199594020843506
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1012 |
2025-11-30 16:12:29,508 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1013 |
2025-11-30 16:12:30,027 - root - [32m[1mINFO[0m - Step 27265/28600 train rougeL: 1.0
|
| 1014 |
2025-11-30 16:12:30,090 - root - [32m[1mINFO[0m - Step 27265/28600 loss: 0.6465226411819458, nll_loss: 0.006603807210922241, distill_loss: 0.3199594020843506
|
| 1015 |
+
2025-11-30 16:13:11,040 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1016 |
+
2025-11-30 16:13:12,944 - root - [32m[1mINFO[0m - Step 27393/28600 train rougeL: 0.9423669467787115
|
| 1017 |
+
2025-11-30 16:13:13,007 - root - [32m[1mINFO[0m - Step 27393/28600 loss: 0.7532645463943481, nll_loss: 0.016324713826179504, distill_loss: 0.3684699237346649
|
| 1018 |
+
2025-11-30 16:13:54,498 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1019 |
+
2025-11-30 16:13:55,185 - root - [32m[1mINFO[0m - Step 27521/28600 train rougeL: 1.0
|
| 1020 |
+
2025-11-30 16:13:55,247 - root - [32m[1mINFO[0m - Step 27521/28600 loss: 0.6677549481391907, nll_loss: 0.00863515306264162, distill_loss: 0.32955989241600037
|
| 1021 |
+
2025-11-30 16:14:36,622 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1022 |
+
2025-11-30 16:14:37,835 - root - [32m[1mINFO[0m - Step 27649/28600 train rougeL: 1.0
|
| 1023 |
+
2025-11-30 16:14:37,897 - root - [32m[1mINFO[0m - Step 27649/28600 loss: 0.6685707569122314, nll_loss: 0.021448642015457153, distill_loss: 0.32356104254722595
|
| 1024 |
+
2025-11-30 16:15:18,918 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1025 |
+
2025-11-30 16:15:19,342 - root - [32m[1mINFO[0m - Step 27777/28600 train rougeL: 1.0
|
| 1026 |
+
2025-11-30 16:15:19,404 - root - [32m[1mINFO[0m - Step 27777/28600 loss: 0.6504983901977539, nll_loss: 0.006399303674697876, distill_loss: 0.3220495283603668
|
| 1027 |
+
2025-11-30 16:16:00,394 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1028 |
+
2025-11-30 16:16:02,356 - root - [32m[1mINFO[0m - Step 27905/28600 train rougeL: 0.809881208171401
|
| 1029 |
+
2025-11-30 16:16:02,420 - root - [32m[1mINFO[0m - Step 27905/28600 loss: 0.7603797912597656, nll_loss: 0.02054585888981819, distill_loss: 0.36991697549819946
|
| 1030 |
+
2025-11-30 16:16:43,365 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1031 |
+
2025-11-30 16:16:45,164 - root - [32m[1mINFO[0m - Step 28033/28600 train rougeL: 0.9436945990517419
|
| 1032 |
+
2025-11-30 16:16:45,226 - root - [32m[1mINFO[0m - Step 28033/28600 loss: 0.7420259714126587, nll_loss: 0.017242711037397385, distill_loss: 0.3623916208744049
|
| 1033 |
+
2025-11-30 16:17:26,568 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1034 |
+
2025-11-30 16:17:28,259 - root - [32m[1mINFO[0m - Step 28161/28600 train rougeL: 1.0
|
| 1035 |
+
2025-11-30 16:17:28,322 - root - [32m[1mINFO[0m - Step 28161/28600 loss: 0.739902675151825, nll_loss: 0.02427525632083416, distill_loss: 0.3578137159347534
|
| 1036 |
+
2025-11-30 16:18:09,262 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1037 |
+
2025-11-30 16:18:11,090 - root - [32m[1mINFO[0m - Step 28289/28600 train rougeL: 0.9519543973941368
|
| 1038 |
+
2025-11-30 16:18:11,153 - root - [32m[1mINFO[0m - Step 28289/28600 loss: 0.7623029351234436, nll_loss: 0.014688455499708652, distill_loss: 0.37380725145339966
|
| 1039 |
+
2025-11-30 16:18:52,249 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1040 |
+
2025-11-30 16:18:53,637 - root - [32m[1mINFO[0m - Step 28417/28600 train rougeL: 1.0
|
| 1041 |
+
2025-11-30 16:18:53,700 - root - [32m[1mINFO[0m - Step 28417/28600 loss: 0.6988828182220459, nll_loss: 0.013126135803759098, distill_loss: 0.3428783416748047
|
| 1042 |
+
2025-11-30 16:19:34,627 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1043 |
+
2025-11-30 16:19:35,562 - root - [32m[1mINFO[0m - Step 28545/28600 train rougeL: 1.0
|
| 1044 |
+
2025-11-30 16:19:35,624 - root - [32m[1mINFO[0m - Step 28545/28600 loss: 0.6725203394889832, nll_loss: 0.014981920830905437, distill_loss: 0.3287692070007324
|
| 1045 |
+
2025-11-30 16:19:52,933 - root - [32m[1mINFO[0m - Epoch 20/20 finished
|
| 1046 |
+
2025-11-30 16:19:52,951 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1047 |
+
2025-11-30 16:19:55,450 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1048 |
+
2025-11-30 16:19:57,953 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1049 |
+
2025-11-30 16:20:00,425 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1050 |
+
2025-11-30 16:20:02,916 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1051 |
+
2025-11-30 16:20:05,435 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1052 |
+
2025-11-30 16:20:07,859 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1053 |
+
2025-11-30 16:20:10,358 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1054 |
+
2025-11-30 16:20:12,855 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1055 |
+
2025-11-30 16:20:15,229 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1056 |
+
2025-11-30 16:20:17,704 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1057 |
+
2025-11-30 16:20:20,190 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1058 |
+
2025-11-30 16:20:22,166 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1059 |
+
2025-11-30 16:20:24,692 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1060 |
+
2025-11-30 16:20:27,160 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1061 |
+
2025-11-30 16:20:29,634 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 1062 |
+
2025-11-30 16:20:31,697 - root - [32m[1mINFO[0m - Epoch 20/20 eval rougeL: 0.24898792831535368
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251130_132733/tuandao_qwen1.5-1.8b_to_gpt2-120m_metrics.jsonl
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|