Upload folder using huggingface_hub
Browse files- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/config.json +39 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/generation_config.json +6 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/lr_scheduler.pt +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/merges.txt +0 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/model.safetensors +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/optimizer.pt +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/projector_0.pt +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/projector_1.pt +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/projector_2.pt +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/projector_3.pt +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/projector_4.pt +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/projector_5.pt +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/projector_6.pt +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/projector_7.pt +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/query_projector.pt +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/s2t_projector.pt +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/special_tokens_map.json +6 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/t2s_projector.pt +3 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/tokenizer.json +0 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/tokenizer_config.json +21 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/vocab.json +0 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/tuandao_qwen1.5-1.8b_to_gpt2-120m.log +52 -0
- experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/tuandao_qwen1.5-1.8b_to_gpt2-120m_metrics.jsonl +0 -0
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"activation_function": "gelu_new",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"GPT2LMHeadModel"
|
| 5 |
+
],
|
| 6 |
+
"attn_pdrop": 0.1,
|
| 7 |
+
"bos_token_id": 50256,
|
| 8 |
+
"dtype": "float32",
|
| 9 |
+
"embd_pdrop": 0.1,
|
| 10 |
+
"eos_token_id": 50256,
|
| 11 |
+
"initializer_range": 0.02,
|
| 12 |
+
"layer_norm_epsilon": 1e-05,
|
| 13 |
+
"model_type": "gpt2",
|
| 14 |
+
"n_ctx": 1024,
|
| 15 |
+
"n_embd": 768,
|
| 16 |
+
"n_head": 12,
|
| 17 |
+
"n_inner": null,
|
| 18 |
+
"n_layer": 12,
|
| 19 |
+
"n_positions": 1024,
|
| 20 |
+
"pad_token_id": 50256,
|
| 21 |
+
"reorder_and_upcast_attn": false,
|
| 22 |
+
"resid_pdrop": 0.1,
|
| 23 |
+
"scale_attn_by_inverse_layer_idx": false,
|
| 24 |
+
"scale_attn_weights": true,
|
| 25 |
+
"summary_activation": null,
|
| 26 |
+
"summary_first_dropout": 0.1,
|
| 27 |
+
"summary_proj_to_labels": true,
|
| 28 |
+
"summary_type": "cls_index",
|
| 29 |
+
"summary_use_proj": true,
|
| 30 |
+
"task_specific_params": {
|
| 31 |
+
"text-generation": {
|
| 32 |
+
"do_sample": true,
|
| 33 |
+
"max_length": 50
|
| 34 |
+
}
|
| 35 |
+
},
|
| 36 |
+
"transformers_version": "4.56.0",
|
| 37 |
+
"use_cache": true,
|
| 38 |
+
"vocab_size": 50257
|
| 39 |
+
}
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/generation_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"bos_token_id": 50256,
|
| 4 |
+
"eos_token_id": 50256,
|
| 5 |
+
"transformers_version": "4.56.0"
|
| 6 |
+
}
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/lr_scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1cfac693e0b5af466e48332e041e057ee27abaf9cab8eb3a91852f357a818c75
|
| 3 |
+
size 1483
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9be8d6d89ed7b0ed28ec0e4e6bcfcf5a45e352ca1197c30feb3d0722fc2b146c
|
| 3 |
+
size 497774208
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fbfa3917cd670378b48ea1dba7de94800f133d7cf77da4f8f1ab161473bca23b
|
| 3 |
+
size 1171926795
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/projector_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc955644541767ab0d806b111eafa4081220da86e680f16971055f6fa5f8840c
|
| 3 |
+
size 6296517
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/projector_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d63e158e13360f720ef3daf6dd2897fcef7b537cbc397b7b395583a4ae1ca762
|
| 3 |
+
size 6296517
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/projector_2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3957878aee0750d99156b29e6563bb9dcaca227d9a7d87b66731ea9ac8cc0c85
|
| 3 |
+
size 6296517
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/projector_3.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:544603d06e4a9b3fdddddf8e6ec560376809fbfdb248f5b659afe9accf9641a6
|
| 3 |
+
size 6296517
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/projector_4.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e297cbbbb65e8f7a0d6b079feede5d4edf9fcd1f01579cb3d02ac085ccc9127e
|
| 3 |
+
size 6296517
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/projector_5.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84819519aded0aecf8ae78f55e84d1607b866e93dcd4e149083bbc9e8b6d4fb2
|
| 3 |
+
size 6296517
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/projector_6.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:42377842e17e1447dd50808ef2b53585786e6a417d5a179d763604a3ffd20b00
|
| 3 |
+
size 6296517
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/projector_7.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:046c0e63bf3878e006cd23b3bd90b373ff83a0403848ee0e30b273258215436b
|
| 3 |
+
size 6296517
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/query_projector.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:42df0c0ae02b8e1d938f1afc4fc610130bb6acc5dee1ec4fded18b63359532d2
|
| 3 |
+
size 25184229
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/s2t_projector.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf34a7e46d3b54ac2932bcce97d254d1748b8658c53dd15e01bb3513f38343c0
|
| 3 |
+
size 6301653
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/special_tokens_map.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "<|endoftext|>",
|
| 3 |
+
"eos_token": "<|endoftext|>",
|
| 4 |
+
"pad_token": "<|endoftext|>",
|
| 5 |
+
"unk_token": "<|endoftext|>"
|
| 6 |
+
}
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/t2s_projector.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59a3a99a2aefb864dc3a4e32f3ee301be4fa65548b774d45acd09f43dac410f7
|
| 3 |
+
size 6296533
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/tokenizer_config.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"added_tokens_decoder": {
|
| 4 |
+
"50256": {
|
| 5 |
+
"content": "<|endoftext|>",
|
| 6 |
+
"lstrip": false,
|
| 7 |
+
"normalized": true,
|
| 8 |
+
"rstrip": false,
|
| 9 |
+
"single_word": false,
|
| 10 |
+
"special": true
|
| 11 |
+
}
|
| 12 |
+
},
|
| 13 |
+
"bos_token": "<|endoftext|>",
|
| 14 |
+
"clean_up_tokenization_spaces": false,
|
| 15 |
+
"eos_token": "<|endoftext|>",
|
| 16 |
+
"extra_special_tokens": {},
|
| 17 |
+
"model_max_length": 1024,
|
| 18 |
+
"pad_token": "<|endoftext|>",
|
| 19 |
+
"tokenizer_class": "GPT2Tokenizer",
|
| 20 |
+
"unk_token": "<|endoftext|>"
|
| 21 |
+
}
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/checkpoints/epoch_12/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/tuandao_qwen1.5-1.8b_to_gpt2-120m.log
CHANGED
|
@@ -645,3 +645,55 @@
|
|
| 645 |
2025-12-09 06:01:21,749 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 646 |
2025-12-09 06:01:24,709 - root - [32m[1mINFO[0m - Epoch 12/20 eval rougeL: 0.21586395602741776
|
| 647 |
2025-12-09 06:01:26,943 - root - [32m[1mINFO[0m - Epoch 13/20
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 645 |
2025-12-09 06:01:21,749 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 646 |
2025-12-09 06:01:24,709 - root - [32m[1mINFO[0m - Epoch 12/20 eval rougeL: 0.21586395602741776
|
| 647 |
2025-12-09 06:01:26,943 - root - [32m[1mINFO[0m - Epoch 13/20
|
| 648 |
+
2025-12-09 06:03:23,167 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 649 |
+
2025-12-09 06:03:26,056 - root - [32m[1mINFO[0m - Step 17281/28600 train rougeL: 0.8877341920374706
|
| 650 |
+
2025-12-09 06:03:26,365 - root - [32m[1mINFO[0m - Step 17281/28600 loss: 1.1440061330795288, nll_loss: 0.12626519799232483, distill_loss: 0.1744195520877838, dskd_loss: 1.8610622882843018
|
| 651 |
+
2025-12-09 06:05:30,510 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 652 |
+
2025-12-09 06:05:33,754 - root - [32m[1mINFO[0m - Step 17409/28600 train rougeL: 0.7394414648471688
|
| 653 |
+
2025-12-09 06:05:34,049 - root - [32m[1mINFO[0m - Step 17409/28600 loss: 1.007826328277588, nll_loss: 0.1303185373544693, distill_loss: 0.17187200486660004, dskd_loss: 1.5831435918807983
|
| 654 |
+
2025-12-09 06:07:28,268 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 655 |
+
2025-12-09 06:07:29,628 - root - [32m[1mINFO[0m - Step 17537/28600 train rougeL: 0.8919871794871795
|
| 656 |
+
2025-12-09 06:07:29,910 - root - [32m[1mINFO[0m - Step 17537/28600 loss: 0.8760161399841309, nll_loss: 0.09825839102268219, distill_loss: 0.15972398221492767, dskd_loss: 1.3957915306091309
|
| 657 |
+
2025-12-09 06:09:29,699 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 658 |
+
2025-12-09 06:09:31,518 - root - [32m[1mINFO[0m - Step 17665/28600 train rougeL: 0.875
|
| 659 |
+
2025-12-09 06:09:31,800 - root - [32m[1mINFO[0m - Step 17665/28600 loss: 0.9362841248512268, nll_loss: 0.09939136356115341, distill_loss: 0.15755969285964966, dskd_loss: 1.516225814819336
|
| 660 |
+
2025-12-09 06:11:35,868 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 661 |
+
2025-12-09 06:11:38,413 - root - [32m[1mINFO[0m - Step 17793/28600 train rougeL: 0.8717079732028185
|
| 662 |
+
2025-12-09 06:11:38,716 - root - [32m[1mINFO[0m - Step 17793/28600 loss: 0.9913924932479858, nll_loss: 0.15530525147914886, distill_loss: 0.170277938246727, dskd_loss: 1.5018965005874634
|
| 663 |
+
2025-12-09 06:13:38,174 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 664 |
+
2025-12-09 06:13:41,233 - root - [32m[1mINFO[0m - Step 17921/28600 train rougeL: 0.8828334311342108
|
| 665 |
+
2025-12-09 06:13:41,527 - root - [32m[1mINFO[0m - Step 17921/28600 loss: 1.1010695695877075, nll_loss: 0.13439980149269104, distill_loss: 0.2036806046962738, dskd_loss: 1.729658842086792
|
| 666 |
+
2025-12-09 06:15:41,550 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 667 |
+
2025-12-09 06:15:43,754 - root - [32m[1mINFO[0m - Step 18049/28600 train rougeL: 0.7698166988391756
|
| 668 |
+
2025-12-09 06:15:44,078 - root - [32m[1mINFO[0m - Step 18049/28600 loss: 1.1418837308883667, nll_loss: 0.16063542664051056, distill_loss: 0.16913819313049316, dskd_loss: 1.7933584451675415
|
| 669 |
+
2025-12-09 06:17:41,141 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 670 |
+
2025-12-09 06:17:44,373 - root - [32m[1mINFO[0m - Step 18177/28600 train rougeL: 0.8504827126515868
|
| 671 |
+
2025-12-09 06:17:44,683 - root - [32m[1mINFO[0m - Step 18177/28600 loss: 0.8415998220443726, nll_loss: 0.12076552957296371, distill_loss: 0.1570442020893097, dskd_loss: 1.2846243381500244
|
| 672 |
+
2025-12-09 06:19:39,203 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 673 |
+
2025-12-09 06:19:41,200 - root - [32m[1mINFO[0m - Step 18305/28600 train rougeL: 0.9
|
| 674 |
+
2025-12-09 06:19:41,483 - root - [32m[1mINFO[0m - Step 18305/28600 loss: 0.8405715227127075, nll_loss: 0.10600381344556808, distill_loss: 0.15056058764457703, dskd_loss: 1.3185747861862183
|
| 675 |
+
2025-12-09 06:21:38,991 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 676 |
+
2025-12-09 06:21:41,863 - root - [32m[1mINFO[0m - Step 18433/28600 train rougeL: 0.7330878138255187
|
| 677 |
+
2025-12-09 06:21:42,176 - root - [32m[1mINFO[0m - Step 18433/28600 loss: 1.2977479696273804, nll_loss: 0.22122560441493988, distill_loss: 0.17732103168964386, dskd_loss: 1.9757237434387207
|
| 678 |
+
2025-12-09 06:23:39,174 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 679 |
+
2025-12-09 06:23:41,229 - root - [32m[1mINFO[0m - Step 18561/28600 train rougeL: 0.8313110586464664
|
| 680 |
+
2025-12-09 06:23:41,520 - root - [32m[1mINFO[0m - Step 18561/28600 loss: 1.1198689937591553, nll_loss: 0.19140806794166565, distill_loss: 0.16645745933055878, dskd_loss: 1.6904644966125488
|
| 681 |
+
2025-12-09 06:24:09,095 - root - [32m[1mINFO[0m - Epoch 13/20 finished
|
| 682 |
+
2025-12-09 06:24:09,126 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 683 |
+
2025-12-09 06:24:12,406 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 684 |
+
2025-12-09 06:24:15,727 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 685 |
+
2025-12-09 06:24:18,899 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 686 |
+
2025-12-09 06:24:22,027 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 687 |
+
2025-12-09 06:24:25,343 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 688 |
+
2025-12-09 06:24:28,625 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 689 |
+
2025-12-09 06:24:31,908 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 690 |
+
2025-12-09 06:24:35,240 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 691 |
+
2025-12-09 06:24:38,544 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 692 |
+
2025-12-09 06:24:41,807 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 693 |
+
2025-12-09 06:24:45,030 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 694 |
+
2025-12-09 06:24:48,306 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 695 |
+
2025-12-09 06:24:51,557 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 696 |
+
2025-12-09 06:24:54,704 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 697 |
+
2025-12-09 06:24:57,913 - absl - [32m[1mINFO[0m - Using default tokenizer.
|
| 698 |
+
2025-12-09 06:25:01,092 - root - [32m[1mINFO[0m - Epoch 13/20 eval rougeL: 0.21788657515474516
|
| 699 |
+
2025-12-09 06:25:03,338 - root - [32m[1mINFO[0m - Epoch 14/20
|
experiments/tuandao_qwen1.5-1.8b_to_gpt2-120m/20251209_011341/tuandao_qwen1.5-1.8b_to_gpt2-120m_metrics.jsonl
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|