Commit Β·
b994498
1
Parent(s): bc3b412
update
Browse files- trained_model_weight/{checkpoint-134368 β checkpoint-150176}/config.json +0 -0
- trained_model_weight/{checkpoint-134368 β checkpoint-150176}/merges.txt +0 -0
- trained_model_weight/{checkpoint-134368 β checkpoint-150176}/model.safetensors +1 -1
- trained_model_weight/{checkpoint-142272 β checkpoint-150176}/optimizer.pt +1 -1
- trained_model_weight/{checkpoint-142272 β checkpoint-150176}/rng_state.pth +1 -1
- trained_model_weight/{checkpoint-142272 β checkpoint-150176}/scaler.pt +1 -1
- trained_model_weight/{checkpoint-134368 β checkpoint-150176}/scheduler.pt +1 -1
- trained_model_weight/{checkpoint-134368 β checkpoint-150176}/special_tokens_map.json +0 -0
- trained_model_weight/{checkpoint-134368 β checkpoint-150176}/tokenizer.json +0 -0
- trained_model_weight/{checkpoint-134368 β checkpoint-150176}/tokenizer_config.json +0 -0
- trained_model_weight/{checkpoint-142272 β checkpoint-150176}/trainer_state.json +1125 -7
- trained_model_weight/{checkpoint-134368 β checkpoint-150176}/training_args.bin +0 -0
- trained_model_weight/{checkpoint-134368 β checkpoint-150176}/vocab.json +0 -0
- trained_model_weight/{checkpoint-142272 β checkpoint-158080}/config.json +0 -0
- trained_model_weight/{checkpoint-142272 β checkpoint-158080}/merges.txt +0 -0
- trained_model_weight/{checkpoint-142272 β checkpoint-158080}/model.safetensors +1 -1
- trained_model_weight/{checkpoint-134368 β checkpoint-158080}/optimizer.pt +1 -1
- trained_model_weight/{checkpoint-134368 β checkpoint-158080}/rng_state.pth +1 -1
- trained_model_weight/{checkpoint-134368 β checkpoint-158080}/scaler.pt +1 -1
- trained_model_weight/{checkpoint-142272 β checkpoint-158080}/scheduler.pt +1 -1
- trained_model_weight/{checkpoint-142272 β checkpoint-158080}/special_tokens_map.json +0 -0
- trained_model_weight/{checkpoint-142272 β checkpoint-158080}/tokenizer.json +0 -0
- trained_model_weight/{checkpoint-142272 β checkpoint-158080}/tokenizer_config.json +0 -0
- trained_model_weight/{checkpoint-134368 β checkpoint-158080}/trainer_state.json +3361 -7
- trained_model_weight/{checkpoint-142272 β checkpoint-158080}/training_args.bin +0 -0
- trained_model_weight/{checkpoint-142272 β checkpoint-158080}/vocab.json +0 -0
- trained_model_weight/logs/events.out.tfevents.1766505359.38224b27a3cf.4653.0 +2 -2
trained_model_weight/{checkpoint-134368 β checkpoint-150176}/config.json
RENAMED
|
File without changes
|
trained_model_weight/{checkpoint-134368 β checkpoint-150176}/merges.txt
RENAMED
|
File without changes
|
trained_model_weight/{checkpoint-134368 β checkpoint-150176}/model.safetensors
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 498615900
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d1e0d6c1484354682b0783da2bb74c3fb9e02de9c0a225a0b42bd4b8d52202c4
|
| 3 |
size 498615900
|
trained_model_weight/{checkpoint-142272 β checkpoint-150176}/optimizer.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 997354891
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8495891819f10dcc6c163229d3618f5fefa031cce39d822887443457c6979853
|
| 3 |
size 997354891
|
trained_model_weight/{checkpoint-142272 β checkpoint-150176}/rng_state.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8498468df5a6cb0af3ae683e4bceac01be967fc4d438bfbc9c80a86ac6608fe4
|
| 3 |
size 14645
|
trained_model_weight/{checkpoint-142272 β checkpoint-150176}/scaler.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1383
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:875f7587c894e95cfb903e37808d27dc1353dac308319bcd2d6a9b9e018c7fd5
|
| 3 |
size 1383
|
trained_model_weight/{checkpoint-134368 β checkpoint-150176}/scheduler.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:250b07a66094ddf483c72d9d5226472e2beb65fe5bc50561ddfe1f41d7a5b261
|
| 3 |
size 1465
|
trained_model_weight/{checkpoint-134368 β checkpoint-150176}/special_tokens_map.json
RENAMED
|
File without changes
|
trained_model_weight/{checkpoint-134368 β checkpoint-150176}/tokenizer.json
RENAMED
|
File without changes
|
trained_model_weight/{checkpoint-134368 β checkpoint-150176}/tokenizer_config.json
RENAMED
|
File without changes
|
trained_model_weight/{checkpoint-142272 β checkpoint-150176}/trainer_state.json
RENAMED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 0.
|
| 4 |
-
"best_model_checkpoint": "model/3_trained_model/checkpoint-
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -20139,6 +20139,1124 @@
|
|
| 20139 |
"eval_samples_per_second": 640.832,
|
| 20140 |
"eval_steps_per_second": 5.007,
|
| 20141 |
"step": 142272
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20142 |
}
|
| 20143 |
],
|
| 20144 |
"logging_steps": 50,
|
|
@@ -20153,7 +21271,7 @@
|
|
| 20153 |
"early_stopping_threshold": 0.0
|
| 20154 |
},
|
| 20155 |
"attributes": {
|
| 20156 |
-
"early_stopping_patience_counter":
|
| 20157 |
}
|
| 20158 |
},
|
| 20159 |
"TrainerControl": {
|
|
@@ -20167,7 +21285,7 @@
|
|
| 20167 |
"attributes": {}
|
| 20168 |
}
|
| 20169 |
},
|
| 20170 |
-
"total_flos": 4.
|
| 20171 |
"train_batch_size": 128,
|
| 20172 |
"trial_name": null,
|
| 20173 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 150176,
|
| 3 |
+
"best_metric": 0.9650628878310451,
|
| 4 |
+
"best_model_checkpoint": "model/3_trained_model/checkpoint-150176",
|
| 5 |
+
"epoch": 19.0,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 150176,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 20139 |
"eval_samples_per_second": 640.832,
|
| 20140 |
"eval_steps_per_second": 5.007,
|
| 20141 |
"step": 142272
|
| 20142 |
+
},
|
| 20143 |
+
{
|
| 20144 |
+
"epoch": 18.00354251012146,
|
| 20145 |
+
"grad_norm": 0.07442587614059448,
|
| 20146 |
+
"learning_rate": 4.903072054844188e-05,
|
| 20147 |
+
"loss": 0.0361,
|
| 20148 |
+
"step": 142300
|
| 20149 |
+
},
|
| 20150 |
+
{
|
| 20151 |
+
"epoch": 18.00986842105263,
|
| 20152 |
+
"grad_norm": 0.5681266188621521,
|
| 20153 |
+
"learning_rate": 4.9029197701744056e-05,
|
| 20154 |
+
"loss": 0.033,
|
| 20155 |
+
"step": 142350
|
| 20156 |
+
},
|
| 20157 |
+
{
|
| 20158 |
+
"epoch": 18.016194331983804,
|
| 20159 |
+
"grad_norm": 5.930596828460693,
|
| 20160 |
+
"learning_rate": 4.902767368339017e-05,
|
| 20161 |
+
"loss": 0.0329,
|
| 20162 |
+
"step": 142400
|
| 20163 |
+
},
|
| 20164 |
+
{
|
| 20165 |
+
"epoch": 18.02252024291498,
|
| 20166 |
+
"grad_norm": 11.56811809539795,
|
| 20167 |
+
"learning_rate": 4.902614849345455e-05,
|
| 20168 |
+
"loss": 0.025,
|
| 20169 |
+
"step": 142450
|
| 20170 |
+
},
|
| 20171 |
+
{
|
| 20172 |
+
"epoch": 18.028846153846153,
|
| 20173 |
+
"grad_norm": 1.9195815324783325,
|
| 20174 |
+
"learning_rate": 4.902462213201155e-05,
|
| 20175 |
+
"loss": 0.028,
|
| 20176 |
+
"step": 142500
|
| 20177 |
+
},
|
| 20178 |
+
{
|
| 20179 |
+
"epoch": 18.035172064777328,
|
| 20180 |
+
"grad_norm": 5.02556848526001,
|
| 20181 |
+
"learning_rate": 4.90230945991356e-05,
|
| 20182 |
+
"loss": 0.0364,
|
| 20183 |
+
"step": 142550
|
| 20184 |
+
},
|
| 20185 |
+
{
|
| 20186 |
+
"epoch": 18.041497975708502,
|
| 20187 |
+
"grad_norm": 3.7673704624176025,
|
| 20188 |
+
"learning_rate": 4.902156589490118e-05,
|
| 20189 |
+
"loss": 0.0351,
|
| 20190 |
+
"step": 142600
|
| 20191 |
+
},
|
| 20192 |
+
{
|
| 20193 |
+
"epoch": 18.047823886639677,
|
| 20194 |
+
"grad_norm": 9.963543891906738,
|
| 20195 |
+
"learning_rate": 4.902003601938284e-05,
|
| 20196 |
+
"loss": 0.0395,
|
| 20197 |
+
"step": 142650
|
| 20198 |
+
},
|
| 20199 |
+
{
|
| 20200 |
+
"epoch": 18.05414979757085,
|
| 20201 |
+
"grad_norm": 5.36358118057251,
|
| 20202 |
+
"learning_rate": 4.901850497265515e-05,
|
| 20203 |
+
"loss": 0.0341,
|
| 20204 |
+
"step": 142700
|
| 20205 |
+
},
|
| 20206 |
+
{
|
| 20207 |
+
"epoch": 18.060475708502025,
|
| 20208 |
+
"grad_norm": 2.3602027893066406,
|
| 20209 |
+
"learning_rate": 4.901697275479279e-05,
|
| 20210 |
+
"loss": 0.0359,
|
| 20211 |
+
"step": 142750
|
| 20212 |
+
},
|
| 20213 |
+
{
|
| 20214 |
+
"epoch": 18.0668016194332,
|
| 20215 |
+
"grad_norm": 0.43489065766334534,
|
| 20216 |
+
"learning_rate": 4.9015439365870466e-05,
|
| 20217 |
+
"loss": 0.0338,
|
| 20218 |
+
"step": 142800
|
| 20219 |
+
},
|
| 20220 |
+
{
|
| 20221 |
+
"epoch": 18.073127530364374,
|
| 20222 |
+
"grad_norm": 7.398189067840576,
|
| 20223 |
+
"learning_rate": 4.9013904805962935e-05,
|
| 20224 |
+
"loss": 0.0307,
|
| 20225 |
+
"step": 142850
|
| 20226 |
+
},
|
| 20227 |
+
{
|
| 20228 |
+
"epoch": 18.079453441295545,
|
| 20229 |
+
"grad_norm": 4.547923564910889,
|
| 20230 |
+
"learning_rate": 4.901236907514502e-05,
|
| 20231 |
+
"loss": 0.0381,
|
| 20232 |
+
"step": 142900
|
| 20233 |
+
},
|
| 20234 |
+
{
|
| 20235 |
+
"epoch": 18.08577935222672,
|
| 20236 |
+
"grad_norm": 5.263890266418457,
|
| 20237 |
+
"learning_rate": 4.9010832173491615e-05,
|
| 20238 |
+
"loss": 0.035,
|
| 20239 |
+
"step": 142950
|
| 20240 |
+
},
|
| 20241 |
+
{
|
| 20242 |
+
"epoch": 18.092105263157894,
|
| 20243 |
+
"grad_norm": 1.632132887840271,
|
| 20244 |
+
"learning_rate": 4.900929410107766e-05,
|
| 20245 |
+
"loss": 0.0308,
|
| 20246 |
+
"step": 143000
|
| 20247 |
+
},
|
| 20248 |
+
{
|
| 20249 |
+
"epoch": 18.098431174089068,
|
| 20250 |
+
"grad_norm": 0.8041329979896545,
|
| 20251 |
+
"learning_rate": 4.900775485797814e-05,
|
| 20252 |
+
"loss": 0.0377,
|
| 20253 |
+
"step": 143050
|
| 20254 |
+
},
|
| 20255 |
+
{
|
| 20256 |
+
"epoch": 18.104757085020243,
|
| 20257 |
+
"grad_norm": 4.4583659172058105,
|
| 20258 |
+
"learning_rate": 4.900621444426811e-05,
|
| 20259 |
+
"loss": 0.0338,
|
| 20260 |
+
"step": 143100
|
| 20261 |
+
},
|
| 20262 |
+
{
|
| 20263 |
+
"epoch": 18.111082995951417,
|
| 20264 |
+
"grad_norm": 2.080909490585327,
|
| 20265 |
+
"learning_rate": 4.9004672860022684e-05,
|
| 20266 |
+
"loss": 0.0351,
|
| 20267 |
+
"step": 143150
|
| 20268 |
+
},
|
| 20269 |
+
{
|
| 20270 |
+
"epoch": 18.11740890688259,
|
| 20271 |
+
"grad_norm": 1.0346838235855103,
|
| 20272 |
+
"learning_rate": 4.900313010531702e-05,
|
| 20273 |
+
"loss": 0.0273,
|
| 20274 |
+
"step": 143200
|
| 20275 |
+
},
|
| 20276 |
+
{
|
| 20277 |
+
"epoch": 18.123734817813766,
|
| 20278 |
+
"grad_norm": 2.481536865234375,
|
| 20279 |
+
"learning_rate": 4.9001586180226355e-05,
|
| 20280 |
+
"loss": 0.0387,
|
| 20281 |
+
"step": 143250
|
| 20282 |
+
},
|
| 20283 |
+
{
|
| 20284 |
+
"epoch": 18.13006072874494,
|
| 20285 |
+
"grad_norm": 2.8943986892700195,
|
| 20286 |
+
"learning_rate": 4.900004108482597e-05,
|
| 20287 |
+
"loss": 0.0401,
|
| 20288 |
+
"step": 143300
|
| 20289 |
+
},
|
| 20290 |
+
{
|
| 20291 |
+
"epoch": 18.136386639676115,
|
| 20292 |
+
"grad_norm": 4.4804463386535645,
|
| 20293 |
+
"learning_rate": 4.899849481919119e-05,
|
| 20294 |
+
"loss": 0.0366,
|
| 20295 |
+
"step": 143350
|
| 20296 |
+
},
|
| 20297 |
+
{
|
| 20298 |
+
"epoch": 18.14271255060729,
|
| 20299 |
+
"grad_norm": 3.364633798599243,
|
| 20300 |
+
"learning_rate": 4.899694738339743e-05,
|
| 20301 |
+
"loss": 0.0275,
|
| 20302 |
+
"step": 143400
|
| 20303 |
+
},
|
| 20304 |
+
{
|
| 20305 |
+
"epoch": 18.14903846153846,
|
| 20306 |
+
"grad_norm": 5.939627647399902,
|
| 20307 |
+
"learning_rate": 4.899539877752012e-05,
|
| 20308 |
+
"loss": 0.0391,
|
| 20309 |
+
"step": 143450
|
| 20310 |
+
},
|
| 20311 |
+
{
|
| 20312 |
+
"epoch": 18.155364372469634,
|
| 20313 |
+
"grad_norm": 3.305469512939453,
|
| 20314 |
+
"learning_rate": 4.8993849001634786e-05,
|
| 20315 |
+
"loss": 0.035,
|
| 20316 |
+
"step": 143500
|
| 20317 |
+
},
|
| 20318 |
+
{
|
| 20319 |
+
"epoch": 18.16169028340081,
|
| 20320 |
+
"grad_norm": 5.215659141540527,
|
| 20321 |
+
"learning_rate": 4.899229805581699e-05,
|
| 20322 |
+
"loss": 0.0345,
|
| 20323 |
+
"step": 143550
|
| 20324 |
+
},
|
| 20325 |
+
{
|
| 20326 |
+
"epoch": 18.168016194331983,
|
| 20327 |
+
"grad_norm": 3.335085868835449,
|
| 20328 |
+
"learning_rate": 4.8990745940142354e-05,
|
| 20329 |
+
"loss": 0.0328,
|
| 20330 |
+
"step": 143600
|
| 20331 |
+
},
|
| 20332 |
+
{
|
| 20333 |
+
"epoch": 18.174342105263158,
|
| 20334 |
+
"grad_norm": 0.1246602013707161,
|
| 20335 |
+
"learning_rate": 4.8989192654686564e-05,
|
| 20336 |
+
"loss": 0.0318,
|
| 20337 |
+
"step": 143650
|
| 20338 |
+
},
|
| 20339 |
+
{
|
| 20340 |
+
"epoch": 18.180668016194332,
|
| 20341 |
+
"grad_norm": 7.838638782501221,
|
| 20342 |
+
"learning_rate": 4.898763819952534e-05,
|
| 20343 |
+
"loss": 0.0338,
|
| 20344 |
+
"step": 143700
|
| 20345 |
+
},
|
| 20346 |
+
{
|
| 20347 |
+
"epoch": 18.186993927125506,
|
| 20348 |
+
"grad_norm": 0.5300619602203369,
|
| 20349 |
+
"learning_rate": 4.898608257473451e-05,
|
| 20350 |
+
"loss": 0.0323,
|
| 20351 |
+
"step": 143750
|
| 20352 |
+
},
|
| 20353 |
+
{
|
| 20354 |
+
"epoch": 18.19331983805668,
|
| 20355 |
+
"grad_norm": 1.8294352293014526,
|
| 20356 |
+
"learning_rate": 4.898452578038989e-05,
|
| 20357 |
+
"loss": 0.0372,
|
| 20358 |
+
"step": 143800
|
| 20359 |
+
},
|
| 20360 |
+
{
|
| 20361 |
+
"epoch": 18.199645748987855,
|
| 20362 |
+
"grad_norm": 9.458368301391602,
|
| 20363 |
+
"learning_rate": 4.898296781656741e-05,
|
| 20364 |
+
"loss": 0.0354,
|
| 20365 |
+
"step": 143850
|
| 20366 |
+
},
|
| 20367 |
+
{
|
| 20368 |
+
"epoch": 18.20597165991903,
|
| 20369 |
+
"grad_norm": 1.249141812324524,
|
| 20370 |
+
"learning_rate": 4.898140868334303e-05,
|
| 20371 |
+
"loss": 0.0361,
|
| 20372 |
+
"step": 143900
|
| 20373 |
+
},
|
| 20374 |
+
{
|
| 20375 |
+
"epoch": 18.212297570850204,
|
| 20376 |
+
"grad_norm": 4.6996750831604,
|
| 20377 |
+
"learning_rate": 4.8979848380792775e-05,
|
| 20378 |
+
"loss": 0.0264,
|
| 20379 |
+
"step": 143950
|
| 20380 |
+
},
|
| 20381 |
+
{
|
| 20382 |
+
"epoch": 18.218623481781375,
|
| 20383 |
+
"grad_norm": 8.336326599121094,
|
| 20384 |
+
"learning_rate": 4.897828690899272e-05,
|
| 20385 |
+
"loss": 0.0393,
|
| 20386 |
+
"step": 144000
|
| 20387 |
+
},
|
| 20388 |
+
{
|
| 20389 |
+
"epoch": 18.22494939271255,
|
| 20390 |
+
"grad_norm": 7.068429470062256,
|
| 20391 |
+
"learning_rate": 4.897672426801901e-05,
|
| 20392 |
+
"loss": 0.035,
|
| 20393 |
+
"step": 144050
|
| 20394 |
+
},
|
| 20395 |
+
{
|
| 20396 |
+
"epoch": 18.231275303643724,
|
| 20397 |
+
"grad_norm": 7.055919170379639,
|
| 20398 |
+
"learning_rate": 4.897516045794783e-05,
|
| 20399 |
+
"loss": 0.0284,
|
| 20400 |
+
"step": 144100
|
| 20401 |
+
},
|
| 20402 |
+
{
|
| 20403 |
+
"epoch": 18.237601214574898,
|
| 20404 |
+
"grad_norm": 2.4833292961120605,
|
| 20405 |
+
"learning_rate": 4.897359547885544e-05,
|
| 20406 |
+
"loss": 0.0384,
|
| 20407 |
+
"step": 144150
|
| 20408 |
+
},
|
| 20409 |
+
{
|
| 20410 |
+
"epoch": 18.243927125506072,
|
| 20411 |
+
"grad_norm": 7.973937511444092,
|
| 20412 |
+
"learning_rate": 4.8972029330818134e-05,
|
| 20413 |
+
"loss": 0.0389,
|
| 20414 |
+
"step": 144200
|
| 20415 |
+
},
|
| 20416 |
+
{
|
| 20417 |
+
"epoch": 18.250253036437247,
|
| 20418 |
+
"grad_norm": 5.254895210266113,
|
| 20419 |
+
"learning_rate": 4.89704620139123e-05,
|
| 20420 |
+
"loss": 0.0348,
|
| 20421 |
+
"step": 144250
|
| 20422 |
+
},
|
| 20423 |
+
{
|
| 20424 |
+
"epoch": 18.25657894736842,
|
| 20425 |
+
"grad_norm": 7.2056169509887695,
|
| 20426 |
+
"learning_rate": 4.896889352821433e-05,
|
| 20427 |
+
"loss": 0.0323,
|
| 20428 |
+
"step": 144300
|
| 20429 |
+
},
|
| 20430 |
+
{
|
| 20431 |
+
"epoch": 18.262904858299596,
|
| 20432 |
+
"grad_norm": 2.8704984188079834,
|
| 20433 |
+
"learning_rate": 4.8967323873800724e-05,
|
| 20434 |
+
"loss": 0.0392,
|
| 20435 |
+
"step": 144350
|
| 20436 |
+
},
|
| 20437 |
+
{
|
| 20438 |
+
"epoch": 18.26923076923077,
|
| 20439 |
+
"grad_norm": 6.73195219039917,
|
| 20440 |
+
"learning_rate": 4.896575305074802e-05,
|
| 20441 |
+
"loss": 0.0344,
|
| 20442 |
+
"step": 144400
|
| 20443 |
+
},
|
| 20444 |
+
{
|
| 20445 |
+
"epoch": 18.275556680161944,
|
| 20446 |
+
"grad_norm": 4.400627613067627,
|
| 20447 |
+
"learning_rate": 4.896418105913279e-05,
|
| 20448 |
+
"loss": 0.0351,
|
| 20449 |
+
"step": 144450
|
| 20450 |
+
},
|
| 20451 |
+
{
|
| 20452 |
+
"epoch": 18.28188259109312,
|
| 20453 |
+
"grad_norm": 2.146008014678955,
|
| 20454 |
+
"learning_rate": 4.896260789903171e-05,
|
| 20455 |
+
"loss": 0.0354,
|
| 20456 |
+
"step": 144500
|
| 20457 |
+
},
|
| 20458 |
+
{
|
| 20459 |
+
"epoch": 18.28820850202429,
|
| 20460 |
+
"grad_norm": 4.487647533416748,
|
| 20461 |
+
"learning_rate": 4.8961033570521465e-05,
|
| 20462 |
+
"loss": 0.0425,
|
| 20463 |
+
"step": 144550
|
| 20464 |
+
},
|
| 20465 |
+
{
|
| 20466 |
+
"epoch": 18.294534412955464,
|
| 20467 |
+
"grad_norm": 1.7115343809127808,
|
| 20468 |
+
"learning_rate": 4.895945807367882e-05,
|
| 20469 |
+
"loss": 0.0312,
|
| 20470 |
+
"step": 144600
|
| 20471 |
+
},
|
| 20472 |
+
{
|
| 20473 |
+
"epoch": 18.30086032388664,
|
| 20474 |
+
"grad_norm": 7.001058578491211,
|
| 20475 |
+
"learning_rate": 4.895788140858062e-05,
|
| 20476 |
+
"loss": 0.0456,
|
| 20477 |
+
"step": 144650
|
| 20478 |
+
},
|
| 20479 |
+
{
|
| 20480 |
+
"epoch": 18.307186234817813,
|
| 20481 |
+
"grad_norm": 6.730059623718262,
|
| 20482 |
+
"learning_rate": 4.895630357530371e-05,
|
| 20483 |
+
"loss": 0.0341,
|
| 20484 |
+
"step": 144700
|
| 20485 |
+
},
|
| 20486 |
+
{
|
| 20487 |
+
"epoch": 18.313512145748987,
|
| 20488 |
+
"grad_norm": 6.6234588623046875,
|
| 20489 |
+
"learning_rate": 4.8954724573925056e-05,
|
| 20490 |
+
"loss": 0.0338,
|
| 20491 |
+
"step": 144750
|
| 20492 |
+
},
|
| 20493 |
+
{
|
| 20494 |
+
"epoch": 18.31983805668016,
|
| 20495 |
+
"grad_norm": 5.9280571937561035,
|
| 20496 |
+
"learning_rate": 4.895314440452162e-05,
|
| 20497 |
+
"loss": 0.0379,
|
| 20498 |
+
"step": 144800
|
| 20499 |
+
},
|
| 20500 |
+
{
|
| 20501 |
+
"epoch": 18.326163967611336,
|
| 20502 |
+
"grad_norm": 0.3688987195491791,
|
| 20503 |
+
"learning_rate": 4.895156306717048e-05,
|
| 20504 |
+
"loss": 0.0388,
|
| 20505 |
+
"step": 144850
|
| 20506 |
+
},
|
| 20507 |
+
{
|
| 20508 |
+
"epoch": 18.33248987854251,
|
| 20509 |
+
"grad_norm": 5.018438816070557,
|
| 20510 |
+
"learning_rate": 4.894998056194872e-05,
|
| 20511 |
+
"loss": 0.0384,
|
| 20512 |
+
"step": 144900
|
| 20513 |
+
},
|
| 20514 |
+
{
|
| 20515 |
+
"epoch": 18.338815789473685,
|
| 20516 |
+
"grad_norm": 4.336573123931885,
|
| 20517 |
+
"learning_rate": 4.89483968889335e-05,
|
| 20518 |
+
"loss": 0.04,
|
| 20519 |
+
"step": 144950
|
| 20520 |
+
},
|
| 20521 |
+
{
|
| 20522 |
+
"epoch": 18.34514170040486,
|
| 20523 |
+
"grad_norm": 4.0648512840271,
|
| 20524 |
+
"learning_rate": 4.894681204820205e-05,
|
| 20525 |
+
"loss": 0.0381,
|
| 20526 |
+
"step": 145000
|
| 20527 |
+
},
|
| 20528 |
+
{
|
| 20529 |
+
"epoch": 18.351467611336034,
|
| 20530 |
+
"grad_norm": 2.135298490524292,
|
| 20531 |
+
"learning_rate": 4.894522603983165e-05,
|
| 20532 |
+
"loss": 0.0384,
|
| 20533 |
+
"step": 145050
|
| 20534 |
+
},
|
| 20535 |
+
{
|
| 20536 |
+
"epoch": 18.357793522267208,
|
| 20537 |
+
"grad_norm": 1.1647357940673828,
|
| 20538 |
+
"learning_rate": 4.894363886389963e-05,
|
| 20539 |
+
"loss": 0.0344,
|
| 20540 |
+
"step": 145100
|
| 20541 |
+
},
|
| 20542 |
+
{
|
| 20543 |
+
"epoch": 18.36411943319838,
|
| 20544 |
+
"grad_norm": 14.28404712677002,
|
| 20545 |
+
"learning_rate": 4.894205052048337e-05,
|
| 20546 |
+
"loss": 0.0357,
|
| 20547 |
+
"step": 145150
|
| 20548 |
+
},
|
| 20549 |
+
{
|
| 20550 |
+
"epoch": 18.370445344129553,
|
| 20551 |
+
"grad_norm": 3.087641954421997,
|
| 20552 |
+
"learning_rate": 4.894046100966033e-05,
|
| 20553 |
+
"loss": 0.0336,
|
| 20554 |
+
"step": 145200
|
| 20555 |
+
},
|
| 20556 |
+
{
|
| 20557 |
+
"epoch": 18.376771255060728,
|
| 20558 |
+
"grad_norm": 4.437451362609863,
|
| 20559 |
+
"learning_rate": 4.8938870331508005e-05,
|
| 20560 |
+
"loss": 0.0447,
|
| 20561 |
+
"step": 145250
|
| 20562 |
+
},
|
| 20563 |
+
{
|
| 20564 |
+
"epoch": 18.383097165991902,
|
| 20565 |
+
"grad_norm": 4.422220706939697,
|
| 20566 |
+
"learning_rate": 4.8937278486103965e-05,
|
| 20567 |
+
"loss": 0.0343,
|
| 20568 |
+
"step": 145300
|
| 20569 |
+
},
|
| 20570 |
+
{
|
| 20571 |
+
"epoch": 18.389423076923077,
|
| 20572 |
+
"grad_norm": 2.800607919692993,
|
| 20573 |
+
"learning_rate": 4.893568547352582e-05,
|
| 20574 |
+
"loss": 0.0337,
|
| 20575 |
+
"step": 145350
|
| 20576 |
+
},
|
| 20577 |
+
{
|
| 20578 |
+
"epoch": 18.39574898785425,
|
| 20579 |
+
"grad_norm": 0.8396826386451721,
|
| 20580 |
+
"learning_rate": 4.8934091293851245e-05,
|
| 20581 |
+
"loss": 0.034,
|
| 20582 |
+
"step": 145400
|
| 20583 |
+
},
|
| 20584 |
+
{
|
| 20585 |
+
"epoch": 18.402074898785425,
|
| 20586 |
+
"grad_norm": 10.142098426818848,
|
| 20587 |
+
"learning_rate": 4.893249594715799e-05,
|
| 20588 |
+
"loss": 0.0347,
|
| 20589 |
+
"step": 145450
|
| 20590 |
+
},
|
| 20591 |
+
{
|
| 20592 |
+
"epoch": 18.4084008097166,
|
| 20593 |
+
"grad_norm": 7.561206817626953,
|
| 20594 |
+
"learning_rate": 4.8930899433523816e-05,
|
| 20595 |
+
"loss": 0.0438,
|
| 20596 |
+
"step": 145500
|
| 20597 |
+
},
|
| 20598 |
+
{
|
| 20599 |
+
"epoch": 18.414726720647774,
|
| 20600 |
+
"grad_norm": 3.3803863525390625,
|
| 20601 |
+
"learning_rate": 4.8929301753026577e-05,
|
| 20602 |
+
"loss": 0.0334,
|
| 20603 |
+
"step": 145550
|
| 20604 |
+
},
|
| 20605 |
+
{
|
| 20606 |
+
"epoch": 18.42105263157895,
|
| 20607 |
+
"grad_norm": 3.183805227279663,
|
| 20608 |
+
"learning_rate": 4.892770290574418e-05,
|
| 20609 |
+
"loss": 0.0463,
|
| 20610 |
+
"step": 145600
|
| 20611 |
+
},
|
| 20612 |
+
{
|
| 20613 |
+
"epoch": 18.427378542510123,
|
| 20614 |
+
"grad_norm": 5.520386219024658,
|
| 20615 |
+
"learning_rate": 4.8926102891754584e-05,
|
| 20616 |
+
"loss": 0.0386,
|
| 20617 |
+
"step": 145650
|
| 20618 |
+
},
|
| 20619 |
+
{
|
| 20620 |
+
"epoch": 18.433704453441294,
|
| 20621 |
+
"grad_norm": 5.411388874053955,
|
| 20622 |
+
"learning_rate": 4.8924501711135806e-05,
|
| 20623 |
+
"loss": 0.0459,
|
| 20624 |
+
"step": 145700
|
| 20625 |
+
},
|
| 20626 |
+
{
|
| 20627 |
+
"epoch": 18.44003036437247,
|
| 20628 |
+
"grad_norm": 7.040444374084473,
|
| 20629 |
+
"learning_rate": 4.892289936396591e-05,
|
| 20630 |
+
"loss": 0.0307,
|
| 20631 |
+
"step": 145750
|
| 20632 |
+
},
|
| 20633 |
+
{
|
| 20634 |
+
"epoch": 18.446356275303643,
|
| 20635 |
+
"grad_norm": 5.863219261169434,
|
| 20636 |
+
"learning_rate": 4.892129585032305e-05,
|
| 20637 |
+
"loss": 0.0429,
|
| 20638 |
+
"step": 145800
|
| 20639 |
+
},
|
| 20640 |
+
{
|
| 20641 |
+
"epoch": 18.452682186234817,
|
| 20642 |
+
"grad_norm": 0.40457883477211,
|
| 20643 |
+
"learning_rate": 4.8919691170285377e-05,
|
| 20644 |
+
"loss": 0.0331,
|
| 20645 |
+
"step": 145850
|
| 20646 |
+
},
|
| 20647 |
+
{
|
| 20648 |
+
"epoch": 18.45900809716599,
|
| 20649 |
+
"grad_norm": 4.594756603240967,
|
| 20650 |
+
"learning_rate": 4.891808532393116e-05,
|
| 20651 |
+
"loss": 0.0348,
|
| 20652 |
+
"step": 145900
|
| 20653 |
+
},
|
| 20654 |
+
{
|
| 20655 |
+
"epoch": 18.465334008097166,
|
| 20656 |
+
"grad_norm": 3.025521993637085,
|
| 20657 |
+
"learning_rate": 4.89164783113387e-05,
|
| 20658 |
+
"loss": 0.0397,
|
| 20659 |
+
"step": 145950
|
| 20660 |
+
},
|
| 20661 |
+
{
|
| 20662 |
+
"epoch": 18.47165991902834,
|
| 20663 |
+
"grad_norm": 4.226476192474365,
|
| 20664 |
+
"learning_rate": 4.8914870132586345e-05,
|
| 20665 |
+
"loss": 0.0403,
|
| 20666 |
+
"step": 146000
|
| 20667 |
+
},
|
| 20668 |
+
{
|
| 20669 |
+
"epoch": 18.477985829959515,
|
| 20670 |
+
"grad_norm": 0.9925578236579895,
|
| 20671 |
+
"learning_rate": 4.8913260787752515e-05,
|
| 20672 |
+
"loss": 0.037,
|
| 20673 |
+
"step": 146050
|
| 20674 |
+
},
|
| 20675 |
+
{
|
| 20676 |
+
"epoch": 18.48431174089069,
|
| 20677 |
+
"grad_norm": 4.826671600341797,
|
| 20678 |
+
"learning_rate": 4.891165027691567e-05,
|
| 20679 |
+
"loss": 0.032,
|
| 20680 |
+
"step": 146100
|
| 20681 |
+
},
|
| 20682 |
+
{
|
| 20683 |
+
"epoch": 18.490637651821864,
|
| 20684 |
+
"grad_norm": 1.542333722114563,
|
| 20685 |
+
"learning_rate": 4.891003860015435e-05,
|
| 20686 |
+
"loss": 0.0334,
|
| 20687 |
+
"step": 146150
|
| 20688 |
+
},
|
| 20689 |
+
{
|
| 20690 |
+
"epoch": 18.496963562753038,
|
| 20691 |
+
"grad_norm": 0.7635722160339355,
|
| 20692 |
+
"learning_rate": 4.890842575754713e-05,
|
| 20693 |
+
"loss": 0.0408,
|
| 20694 |
+
"step": 146200
|
| 20695 |
+
},
|
| 20696 |
+
{
|
| 20697 |
+
"epoch": 18.50328947368421,
|
| 20698 |
+
"grad_norm": 3.375404119491577,
|
| 20699 |
+
"learning_rate": 4.8906811749172663e-05,
|
| 20700 |
+
"loss": 0.0409,
|
| 20701 |
+
"step": 146250
|
| 20702 |
+
},
|
| 20703 |
+
{
|
| 20704 |
+
"epoch": 18.509615384615383,
|
| 20705 |
+
"grad_norm": 5.503757953643799,
|
| 20706 |
+
"learning_rate": 4.8905196575109645e-05,
|
| 20707 |
+
"loss": 0.032,
|
| 20708 |
+
"step": 146300
|
| 20709 |
+
},
|
| 20710 |
+
{
|
| 20711 |
+
"epoch": 18.515941295546558,
|
| 20712 |
+
"grad_norm": 6.0684428215026855,
|
| 20713 |
+
"learning_rate": 4.890358023543683e-05,
|
| 20714 |
+
"loss": 0.0391,
|
| 20715 |
+
"step": 146350
|
| 20716 |
+
},
|
| 20717 |
+
{
|
| 20718 |
+
"epoch": 18.522267206477732,
|
| 20719 |
+
"grad_norm": 8.43917465209961,
|
| 20720 |
+
"learning_rate": 4.890196273023302e-05,
|
| 20721 |
+
"loss": 0.0358,
|
| 20722 |
+
"step": 146400
|
| 20723 |
+
},
|
| 20724 |
+
{
|
| 20725 |
+
"epoch": 18.528593117408906,
|
| 20726 |
+
"grad_norm": 2.1651077270507812,
|
| 20727 |
+
"learning_rate": 4.89003440595771e-05,
|
| 20728 |
+
"loss": 0.0384,
|
| 20729 |
+
"step": 146450
|
| 20730 |
+
},
|
| 20731 |
+
{
|
| 20732 |
+
"epoch": 18.53491902834008,
|
| 20733 |
+
"grad_norm": 4.436135292053223,
|
| 20734 |
+
"learning_rate": 4.8898724223547986e-05,
|
| 20735 |
+
"loss": 0.037,
|
| 20736 |
+
"step": 146500
|
| 20737 |
+
},
|
| 20738 |
+
{
|
| 20739 |
+
"epoch": 18.541244939271255,
|
| 20740 |
+
"grad_norm": 5.470726013183594,
|
| 20741 |
+
"learning_rate": 4.889710322222467e-05,
|
| 20742 |
+
"loss": 0.0402,
|
| 20743 |
+
"step": 146550
|
| 20744 |
+
},
|
| 20745 |
+
{
|
| 20746 |
+
"epoch": 18.54757085020243,
|
| 20747 |
+
"grad_norm": 1.7798995971679688,
|
| 20748 |
+
"learning_rate": 4.8895481055686176e-05,
|
| 20749 |
+
"loss": 0.0329,
|
| 20750 |
+
"step": 146600
|
| 20751 |
+
},
|
| 20752 |
+
{
|
| 20753 |
+
"epoch": 18.553896761133604,
|
| 20754 |
+
"grad_norm": 8.59964370727539,
|
| 20755 |
+
"learning_rate": 4.8893857724011615e-05,
|
| 20756 |
+
"loss": 0.0425,
|
| 20757 |
+
"step": 146650
|
| 20758 |
+
},
|
| 20759 |
+
{
|
| 20760 |
+
"epoch": 18.56022267206478,
|
| 20761 |
+
"grad_norm": 8.19287109375,
|
| 20762 |
+
"learning_rate": 4.8892233227280125e-05,
|
| 20763 |
+
"loss": 0.0344,
|
| 20764 |
+
"step": 146700
|
| 20765 |
+
},
|
| 20766 |
+
{
|
| 20767 |
+
"epoch": 18.566548582995953,
|
| 20768 |
+
"grad_norm": 4.912006855010986,
|
| 20769 |
+
"learning_rate": 4.8890607565570934e-05,
|
| 20770 |
+
"loss": 0.0347,
|
| 20771 |
+
"step": 146750
|
| 20772 |
+
},
|
| 20773 |
+
{
|
| 20774 |
+
"epoch": 18.572874493927124,
|
| 20775 |
+
"grad_norm": 1.8978865146636963,
|
| 20776 |
+
"learning_rate": 4.88889807389633e-05,
|
| 20777 |
+
"loss": 0.0435,
|
| 20778 |
+
"step": 146800
|
| 20779 |
+
},
|
| 20780 |
+
{
|
| 20781 |
+
"epoch": 18.579200404858298,
|
| 20782 |
+
"grad_norm": 4.588613510131836,
|
| 20783 |
+
"learning_rate": 4.8887352747536545e-05,
|
| 20784 |
+
"loss": 0.0408,
|
| 20785 |
+
"step": 146850
|
| 20786 |
+
},
|
| 20787 |
+
{
|
| 20788 |
+
"epoch": 18.585526315789473,
|
| 20789 |
+
"grad_norm": 5.229101657867432,
|
| 20790 |
+
"learning_rate": 4.8885723591370047e-05,
|
| 20791 |
+
"loss": 0.0405,
|
| 20792 |
+
"step": 146900
|
| 20793 |
+
},
|
| 20794 |
+
{
|
| 20795 |
+
"epoch": 18.591852226720647,
|
| 20796 |
+
"grad_norm": 1.9847447872161865,
|
| 20797 |
+
"learning_rate": 4.8884093270543254e-05,
|
| 20798 |
+
"loss": 0.0351,
|
| 20799 |
+
"step": 146950
|
| 20800 |
+
},
|
| 20801 |
+
{
|
| 20802 |
+
"epoch": 18.59817813765182,
|
| 20803 |
+
"grad_norm": 0.37378185987472534,
|
| 20804 |
+
"learning_rate": 4.888246178513565e-05,
|
| 20805 |
+
"loss": 0.0338,
|
| 20806 |
+
"step": 147000
|
| 20807 |
+
},
|
| 20808 |
+
{
|
| 20809 |
+
"epoch": 18.604504048582996,
|
| 20810 |
+
"grad_norm": 5.98549222946167,
|
| 20811 |
+
"learning_rate": 4.888082913522679e-05,
|
| 20812 |
+
"loss": 0.037,
|
| 20813 |
+
"step": 147050
|
| 20814 |
+
},
|
| 20815 |
+
{
|
| 20816 |
+
"epoch": 18.61082995951417,
|
| 20817 |
+
"grad_norm": 1.4204696416854858,
|
| 20818 |
+
"learning_rate": 4.8879195320896275e-05,
|
| 20819 |
+
"loss": 0.0381,
|
| 20820 |
+
"step": 147100
|
| 20821 |
+
},
|
| 20822 |
+
{
|
| 20823 |
+
"epoch": 18.617155870445345,
|
| 20824 |
+
"grad_norm": 4.696562767028809,
|
| 20825 |
+
"learning_rate": 4.8877560342223786e-05,
|
| 20826 |
+
"loss": 0.0339,
|
| 20827 |
+
"step": 147150
|
| 20828 |
+
},
|
| 20829 |
+
{
|
| 20830 |
+
"epoch": 18.62348178137652,
|
| 20831 |
+
"grad_norm": 3.1213343143463135,
|
| 20832 |
+
"learning_rate": 4.887592419928903e-05,
|
| 20833 |
+
"loss": 0.0386,
|
| 20834 |
+
"step": 147200
|
| 20835 |
+
},
|
| 20836 |
+
{
|
| 20837 |
+
"epoch": 18.629807692307693,
|
| 20838 |
+
"grad_norm": 3.26423978805542,
|
| 20839 |
+
"learning_rate": 4.887428689217177e-05,
|
| 20840 |
+
"loss": 0.0379,
|
| 20841 |
+
"step": 147250
|
| 20842 |
+
},
|
| 20843 |
+
{
|
| 20844 |
+
"epoch": 18.636133603238868,
|
| 20845 |
+
"grad_norm": 7.11868143081665,
|
| 20846 |
+
"learning_rate": 4.887264842095187e-05,
|
| 20847 |
+
"loss": 0.0371,
|
| 20848 |
+
"step": 147300
|
| 20849 |
+
},
|
| 20850 |
+
{
|
| 20851 |
+
"epoch": 18.642459514170042,
|
| 20852 |
+
"grad_norm": 2.696596384048462,
|
| 20853 |
+
"learning_rate": 4.8871008785709214e-05,
|
| 20854 |
+
"loss": 0.0375,
|
| 20855 |
+
"step": 147350
|
| 20856 |
+
},
|
| 20857 |
+
{
|
| 20858 |
+
"epoch": 18.648785425101213,
|
| 20859 |
+
"grad_norm": 5.205997467041016,
|
| 20860 |
+
"learning_rate": 4.886936798652374e-05,
|
| 20861 |
+
"loss": 0.035,
|
| 20862 |
+
"step": 147400
|
| 20863 |
+
},
|
| 20864 |
+
{
|
| 20865 |
+
"epoch": 18.655111336032387,
|
| 20866 |
+
"grad_norm": 10.836588859558105,
|
| 20867 |
+
"learning_rate": 4.8867726023475455e-05,
|
| 20868 |
+
"loss": 0.0402,
|
| 20869 |
+
"step": 147450
|
| 20870 |
+
},
|
| 20871 |
+
{
|
| 20872 |
+
"epoch": 18.661437246963562,
|
| 20873 |
+
"grad_norm": 5.026825428009033,
|
| 20874 |
+
"learning_rate": 4.886608289664443e-05,
|
| 20875 |
+
"loss": 0.0423,
|
| 20876 |
+
"step": 147500
|
| 20877 |
+
},
|
| 20878 |
+
{
|
| 20879 |
+
"epoch": 18.667763157894736,
|
| 20880 |
+
"grad_norm": 4.280773639678955,
|
| 20881 |
+
"learning_rate": 4.886443860611078e-05,
|
| 20882 |
+
"loss": 0.0333,
|
| 20883 |
+
"step": 147550
|
| 20884 |
+
},
|
| 20885 |
+
{
|
| 20886 |
+
"epoch": 18.67408906882591,
|
| 20887 |
+
"grad_norm": 4.685303688049316,
|
| 20888 |
+
"learning_rate": 4.886279315195467e-05,
|
| 20889 |
+
"loss": 0.044,
|
| 20890 |
+
"step": 147600
|
| 20891 |
+
},
|
| 20892 |
+
{
|
| 20893 |
+
"epoch": 18.680414979757085,
|
| 20894 |
+
"grad_norm": 5.233664035797119,
|
| 20895 |
+
"learning_rate": 4.886114653425634e-05,
|
| 20896 |
+
"loss": 0.0361,
|
| 20897 |
+
"step": 147650
|
| 20898 |
+
},
|
| 20899 |
+
{
|
| 20900 |
+
"epoch": 18.68674089068826,
|
| 20901 |
+
"grad_norm": 0.5391689538955688,
|
| 20902 |
+
"learning_rate": 4.885949875309608e-05,
|
| 20903 |
+
"loss": 0.0376,
|
| 20904 |
+
"step": 147700
|
| 20905 |
+
},
|
| 20906 |
+
{
|
| 20907 |
+
"epoch": 18.693066801619434,
|
| 20908 |
+
"grad_norm": 5.6207594871521,
|
| 20909 |
+
"learning_rate": 4.885784980855423e-05,
|
| 20910 |
+
"loss": 0.0371,
|
| 20911 |
+
"step": 147750
|
| 20912 |
+
},
|
| 20913 |
+
{
|
| 20914 |
+
"epoch": 18.69939271255061,
|
| 20915 |
+
"grad_norm": 3.2478537559509277,
|
| 20916 |
+
"learning_rate": 4.88561997007112e-05,
|
| 20917 |
+
"loss": 0.0444,
|
| 20918 |
+
"step": 147800
|
| 20919 |
+
},
|
| 20920 |
+
{
|
| 20921 |
+
"epoch": 18.705718623481783,
|
| 20922 |
+
"grad_norm": 3.352736711502075,
|
| 20923 |
+
"learning_rate": 4.885454842964744e-05,
|
| 20924 |
+
"loss": 0.0428,
|
| 20925 |
+
"step": 147850
|
| 20926 |
+
},
|
| 20927 |
+
{
|
| 20928 |
+
"epoch": 18.712044534412957,
|
| 20929 |
+
"grad_norm": 2.903074026107788,
|
| 20930 |
+
"learning_rate": 4.885289599544347e-05,
|
| 20931 |
+
"loss": 0.0402,
|
| 20932 |
+
"step": 147900
|
| 20933 |
+
},
|
| 20934 |
+
{
|
| 20935 |
+
"epoch": 18.718370445344128,
|
| 20936 |
+
"grad_norm": 5.870631217956543,
|
| 20937 |
+
"learning_rate": 4.885124239817987e-05,
|
| 20938 |
+
"loss": 0.0341,
|
| 20939 |
+
"step": 147950
|
| 20940 |
+
},
|
| 20941 |
+
{
|
| 20942 |
+
"epoch": 18.724696356275302,
|
| 20943 |
+
"grad_norm": 0.5317630767822266,
|
| 20944 |
+
"learning_rate": 4.8849587637937245e-05,
|
| 20945 |
+
"loss": 0.0372,
|
| 20946 |
+
"step": 148000
|
| 20947 |
+
},
|
| 20948 |
+
{
|
| 20949 |
+
"epoch": 18.731022267206477,
|
| 20950 |
+
"grad_norm": 7.0173540115356445,
|
| 20951 |
+
"learning_rate": 4.88479317147963e-05,
|
| 20952 |
+
"loss": 0.0426,
|
| 20953 |
+
"step": 148050
|
| 20954 |
+
},
|
| 20955 |
+
{
|
| 20956 |
+
"epoch": 18.73734817813765,
|
| 20957 |
+
"grad_norm": 2.9341790676116943,
|
| 20958 |
+
"learning_rate": 4.884627462883778e-05,
|
| 20959 |
+
"loss": 0.0423,
|
| 20960 |
+
"step": 148100
|
| 20961 |
+
},
|
| 20962 |
+
{
|
| 20963 |
+
"epoch": 18.743674089068826,
|
| 20964 |
+
"grad_norm": 5.416380405426025,
|
| 20965 |
+
"learning_rate": 4.884461638014247e-05,
|
| 20966 |
+
"loss": 0.0382,
|
| 20967 |
+
"step": 148150
|
| 20968 |
+
},
|
| 20969 |
+
{
|
| 20970 |
+
"epoch": 18.75,
|
| 20971 |
+
"grad_norm": 1.5903581380844116,
|
| 20972 |
+
"learning_rate": 4.884295696879123e-05,
|
| 20973 |
+
"loss": 0.0396,
|
| 20974 |
+
"step": 148200
|
| 20975 |
+
},
|
| 20976 |
+
{
|
| 20977 |
+
"epoch": 18.756325910931174,
|
| 20978 |
+
"grad_norm": 0.8948712944984436,
|
| 20979 |
+
"learning_rate": 4.8841296394864986e-05,
|
| 20980 |
+
"loss": 0.0356,
|
| 20981 |
+
"step": 148250
|
| 20982 |
+
},
|
| 20983 |
+
{
|
| 20984 |
+
"epoch": 18.76265182186235,
|
| 20985 |
+
"grad_norm": 5.203305721282959,
|
| 20986 |
+
"learning_rate": 4.8839634658444686e-05,
|
| 20987 |
+
"loss": 0.033,
|
| 20988 |
+
"step": 148300
|
| 20989 |
+
},
|
| 20990 |
+
{
|
| 20991 |
+
"epoch": 18.768977732793523,
|
| 20992 |
+
"grad_norm": 8.04406452178955,
|
| 20993 |
+
"learning_rate": 4.883797175961137e-05,
|
| 20994 |
+
"loss": 0.0378,
|
| 20995 |
+
"step": 148350
|
| 20996 |
+
},
|
| 20997 |
+
{
|
| 20998 |
+
"epoch": 18.775303643724698,
|
| 20999 |
+
"grad_norm": 4.43959379196167,
|
| 21000 |
+
"learning_rate": 4.8836307698446114e-05,
|
| 21001 |
+
"loss": 0.0376,
|
| 21002 |
+
"step": 148400
|
| 21003 |
+
},
|
| 21004 |
+
{
|
| 21005 |
+
"epoch": 18.781629554655872,
|
| 21006 |
+
"grad_norm": 3.668619394302368,
|
| 21007 |
+
"learning_rate": 4.8834642475030066e-05,
|
| 21008 |
+
"loss": 0.0351,
|
| 21009 |
+
"step": 148450
|
| 21010 |
+
},
|
| 21011 |
+
{
|
| 21012 |
+
"epoch": 18.787955465587043,
|
| 21013 |
+
"grad_norm": 2.6937718391418457,
|
| 21014 |
+
"learning_rate": 4.883297608944441e-05,
|
| 21015 |
+
"loss": 0.0414,
|
| 21016 |
+
"step": 148500
|
| 21017 |
+
},
|
| 21018 |
+
{
|
| 21019 |
+
"epoch": 18.794281376518217,
|
| 21020 |
+
"grad_norm": 0.5433449745178223,
|
| 21021 |
+
"learning_rate": 4.88313085417704e-05,
|
| 21022 |
+
"loss": 0.0412,
|
| 21023 |
+
"step": 148550
|
| 21024 |
+
},
|
| 21025 |
+
{
|
| 21026 |
+
"epoch": 18.80060728744939,
|
| 21027 |
+
"grad_norm": 1.1174631118774414,
|
| 21028 |
+
"learning_rate": 4.882963983208936e-05,
|
| 21029 |
+
"loss": 0.0364,
|
| 21030 |
+
"step": 148600
|
| 21031 |
+
},
|
| 21032 |
+
{
|
| 21033 |
+
"epoch": 18.806933198380566,
|
| 21034 |
+
"grad_norm": 7.815613746643066,
|
| 21035 |
+
"learning_rate": 4.882796996048263e-05,
|
| 21036 |
+
"loss": 0.0353,
|
| 21037 |
+
"step": 148650
|
| 21038 |
+
},
|
| 21039 |
+
{
|
| 21040 |
+
"epoch": 18.81325910931174,
|
| 21041 |
+
"grad_norm": 8.072690963745117,
|
| 21042 |
+
"learning_rate": 4.882629892703166e-05,
|
| 21043 |
+
"loss": 0.0353,
|
| 21044 |
+
"step": 148700
|
| 21045 |
+
},
|
| 21046 |
+
{
|
| 21047 |
+
"epoch": 18.819585020242915,
|
| 21048 |
+
"grad_norm": 10.183627128601074,
|
| 21049 |
+
"learning_rate": 4.882462673181791e-05,
|
| 21050 |
+
"loss": 0.0342,
|
| 21051 |
+
"step": 148750
|
| 21052 |
+
},
|
| 21053 |
+
{
|
| 21054 |
+
"epoch": 18.82591093117409,
|
| 21055 |
+
"grad_norm": 10.193069458007812,
|
| 21056 |
+
"learning_rate": 4.882295337492292e-05,
|
| 21057 |
+
"loss": 0.0433,
|
| 21058 |
+
"step": 148800
|
| 21059 |
+
},
|
| 21060 |
+
{
|
| 21061 |
+
"epoch": 18.832236842105264,
|
| 21062 |
+
"grad_norm": 4.935394763946533,
|
| 21063 |
+
"learning_rate": 4.882127885642829e-05,
|
| 21064 |
+
"loss": 0.0374,
|
| 21065 |
+
"step": 148850
|
| 21066 |
+
},
|
| 21067 |
+
{
|
| 21068 |
+
"epoch": 18.838562753036438,
|
| 21069 |
+
"grad_norm": 6.387817859649658,
|
| 21070 |
+
"learning_rate": 4.8819603176415656e-05,
|
| 21071 |
+
"loss": 0.0422,
|
| 21072 |
+
"step": 148900
|
| 21073 |
+
},
|
| 21074 |
+
{
|
| 21075 |
+
"epoch": 18.844888663967613,
|
| 21076 |
+
"grad_norm": 4.677941799163818,
|
| 21077 |
+
"learning_rate": 4.881792633496674e-05,
|
| 21078 |
+
"loss": 0.0453,
|
| 21079 |
+
"step": 148950
|
| 21080 |
+
},
|
| 21081 |
+
{
|
| 21082 |
+
"epoch": 18.851214574898787,
|
| 21083 |
+
"grad_norm": 5.206737995147705,
|
| 21084 |
+
"learning_rate": 4.8816248332163286e-05,
|
| 21085 |
+
"loss": 0.0299,
|
| 21086 |
+
"step": 149000
|
| 21087 |
+
},
|
| 21088 |
+
{
|
| 21089 |
+
"epoch": 18.857540485829958,
|
| 21090 |
+
"grad_norm": 9.254892349243164,
|
| 21091 |
+
"learning_rate": 4.881456916808712e-05,
|
| 21092 |
+
"loss": 0.0416,
|
| 21093 |
+
"step": 149050
|
| 21094 |
+
},
|
| 21095 |
+
{
|
| 21096 |
+
"epoch": 18.863866396761132,
|
| 21097 |
+
"grad_norm": 8.23305892944336,
|
| 21098 |
+
"learning_rate": 4.8812888842820126e-05,
|
| 21099 |
+
"loss": 0.0357,
|
| 21100 |
+
"step": 149100
|
| 21101 |
+
},
|
| 21102 |
+
{
|
| 21103 |
+
"epoch": 18.870192307692307,
|
| 21104 |
+
"grad_norm": 6.135190486907959,
|
| 21105 |
+
"learning_rate": 4.881120735644422e-05,
|
| 21106 |
+
"loss": 0.0403,
|
| 21107 |
+
"step": 149150
|
| 21108 |
+
},
|
| 21109 |
+
{
|
| 21110 |
+
"epoch": 18.87651821862348,
|
| 21111 |
+
"grad_norm": 1.752958059310913,
|
| 21112 |
+
"learning_rate": 4.880952470904141e-05,
|
| 21113 |
+
"loss": 0.0456,
|
| 21114 |
+
"step": 149200
|
| 21115 |
+
},
|
| 21116 |
+
{
|
| 21117 |
+
"epoch": 18.882844129554655,
|
| 21118 |
+
"grad_norm": 5.834423542022705,
|
| 21119 |
+
"learning_rate": 4.8807840900693724e-05,
|
| 21120 |
+
"loss": 0.0402,
|
| 21121 |
+
"step": 149250
|
| 21122 |
+
},
|
| 21123 |
+
{
|
| 21124 |
+
"epoch": 18.88917004048583,
|
| 21125 |
+
"grad_norm": 4.26080322265625,
|
| 21126 |
+
"learning_rate": 4.8806155931483275e-05,
|
| 21127 |
+
"loss": 0.0422,
|
| 21128 |
+
"step": 149300
|
| 21129 |
+
},
|
| 21130 |
+
{
|
| 21131 |
+
"epoch": 18.895495951417004,
|
| 21132 |
+
"grad_norm": 3.6990087032318115,
|
| 21133 |
+
"learning_rate": 4.8804469801492226e-05,
|
| 21134 |
+
"loss": 0.0349,
|
| 21135 |
+
"step": 149350
|
| 21136 |
+
},
|
| 21137 |
+
{
|
| 21138 |
+
"epoch": 18.90182186234818,
|
| 21139 |
+
"grad_norm": 8.296131134033203,
|
| 21140 |
+
"learning_rate": 4.880278251080277e-05,
|
| 21141 |
+
"loss": 0.0432,
|
| 21142 |
+
"step": 149400
|
| 21143 |
+
},
|
| 21144 |
+
{
|
| 21145 |
+
"epoch": 18.908147773279353,
|
| 21146 |
+
"grad_norm": 2.964658737182617,
|
| 21147 |
+
"learning_rate": 4.88010940594972e-05,
|
| 21148 |
+
"loss": 0.0476,
|
| 21149 |
+
"step": 149450
|
| 21150 |
+
},
|
| 21151 |
+
{
|
| 21152 |
+
"epoch": 18.914473684210527,
|
| 21153 |
+
"grad_norm": 0.8520138263702393,
|
| 21154 |
+
"learning_rate": 4.879940444765784e-05,
|
| 21155 |
+
"loss": 0.0395,
|
| 21156 |
+
"step": 149500
|
| 21157 |
+
},
|
| 21158 |
+
{
|
| 21159 |
+
"epoch": 18.920799595141702,
|
| 21160 |
+
"grad_norm": 4.791383743286133,
|
| 21161 |
+
"learning_rate": 4.879771367536706e-05,
|
| 21162 |
+
"loss": 0.0485,
|
| 21163 |
+
"step": 149550
|
| 21164 |
+
},
|
| 21165 |
+
{
|
| 21166 |
+
"epoch": 18.927125506072876,
|
| 21167 |
+
"grad_norm": 0.970451295375824,
|
| 21168 |
+
"learning_rate": 4.879602174270733e-05,
|
| 21169 |
+
"loss": 0.0474,
|
| 21170 |
+
"step": 149600
|
| 21171 |
+
},
|
| 21172 |
+
{
|
| 21173 |
+
"epoch": 18.933451417004047,
|
| 21174 |
+
"grad_norm": 4.890208721160889,
|
| 21175 |
+
"learning_rate": 4.879432864976112e-05,
|
| 21176 |
+
"loss": 0.034,
|
| 21177 |
+
"step": 149650
|
| 21178 |
+
},
|
| 21179 |
+
{
|
| 21180 |
+
"epoch": 18.93977732793522,
|
| 21181 |
+
"grad_norm": 12.406875610351562,
|
| 21182 |
+
"learning_rate": 4.8792634396611005e-05,
|
| 21183 |
+
"loss": 0.0429,
|
| 21184 |
+
"step": 149700
|
| 21185 |
+
},
|
| 21186 |
+
{
|
| 21187 |
+
"epoch": 18.946103238866396,
|
| 21188 |
+
"grad_norm": 6.546516418457031,
|
| 21189 |
+
"learning_rate": 4.879093898333959e-05,
|
| 21190 |
+
"loss": 0.0392,
|
| 21191 |
+
"step": 149750
|
| 21192 |
+
},
|
| 21193 |
+
{
|
| 21194 |
+
"epoch": 18.95242914979757,
|
| 21195 |
+
"grad_norm": 3.4946086406707764,
|
| 21196 |
+
"learning_rate": 4.8789242410029526e-05,
|
| 21197 |
+
"loss": 0.046,
|
| 21198 |
+
"step": 149800
|
| 21199 |
+
},
|
| 21200 |
+
{
|
| 21201 |
+
"epoch": 18.958755060728745,
|
| 21202 |
+
"grad_norm": 5.585727691650391,
|
| 21203 |
+
"learning_rate": 4.878754467676356e-05,
|
| 21204 |
+
"loss": 0.0404,
|
| 21205 |
+
"step": 149850
|
| 21206 |
+
},
|
| 21207 |
+
{
|
| 21208 |
+
"epoch": 18.96508097165992,
|
| 21209 |
+
"grad_norm": 8.973703384399414,
|
| 21210 |
+
"learning_rate": 4.878584578362446e-05,
|
| 21211 |
+
"loss": 0.0364,
|
| 21212 |
+
"step": 149900
|
| 21213 |
+
},
|
| 21214 |
+
{
|
| 21215 |
+
"epoch": 18.971406882591094,
|
| 21216 |
+
"grad_norm": 9.472984313964844,
|
| 21217 |
+
"learning_rate": 4.8784145730695075e-05,
|
| 21218 |
+
"loss": 0.0417,
|
| 21219 |
+
"step": 149950
|
| 21220 |
+
},
|
| 21221 |
+
{
|
| 21222 |
+
"epoch": 18.977732793522268,
|
| 21223 |
+
"grad_norm": 8.045568466186523,
|
| 21224 |
+
"learning_rate": 4.878244451805829e-05,
|
| 21225 |
+
"loss": 0.037,
|
| 21226 |
+
"step": 150000
|
| 21227 |
+
},
|
| 21228 |
+
{
|
| 21229 |
+
"epoch": 18.984058704453442,
|
| 21230 |
+
"grad_norm": 6.592700004577637,
|
| 21231 |
+
"learning_rate": 4.878074214579706e-05,
|
| 21232 |
+
"loss": 0.0366,
|
| 21233 |
+
"step": 150050
|
| 21234 |
+
},
|
| 21235 |
+
{
|
| 21236 |
+
"epoch": 18.990384615384617,
|
| 21237 |
+
"grad_norm": 1.914005160331726,
|
| 21238 |
+
"learning_rate": 4.877903861399439e-05,
|
| 21239 |
+
"loss": 0.0355,
|
| 21240 |
+
"step": 150100
|
| 21241 |
+
},
|
| 21242 |
+
{
|
| 21243 |
+
"epoch": 18.99671052631579,
|
| 21244 |
+
"grad_norm": 9.871822357177734,
|
| 21245 |
+
"learning_rate": 4.8777333922733334e-05,
|
| 21246 |
+
"loss": 0.0415,
|
| 21247 |
+
"step": 150150
|
| 21248 |
+
},
|
| 21249 |
+
{
|
| 21250 |
+
"epoch": 19.0,
|
| 21251 |
+
"eval_accuracy": 0.9647017421768116,
|
| 21252 |
+
"eval_f1": 0.9650628878310451,
|
| 21253 |
+
"eval_loss": 0.14777891337871552,
|
| 21254 |
+
"eval_precision": 0.9651038822009902,
|
| 21255 |
+
"eval_recall": 0.9651774362311173,
|
| 21256 |
+
"eval_runtime": 395.6448,
|
| 21257 |
+
"eval_samples_per_second": 639.215,
|
| 21258 |
+
"eval_steps_per_second": 4.994,
|
| 21259 |
+
"step": 150176
|
| 21260 |
}
|
| 21261 |
],
|
| 21262 |
"logging_steps": 50,
|
|
|
|
| 21271 |
"early_stopping_threshold": 0.0
|
| 21272 |
},
|
| 21273 |
"attributes": {
|
| 21274 |
+
"early_stopping_patience_counter": 0
|
| 21275 |
}
|
| 21276 |
},
|
| 21277 |
"TrainerControl": {
|
|
|
|
| 21285 |
"attributes": {}
|
| 21286 |
}
|
| 21287 |
},
|
| 21288 |
+
"total_flos": 4.844708940333953e+18,
|
| 21289 |
"train_batch_size": 128,
|
| 21290 |
"trial_name": null,
|
| 21291 |
"trial_params": null
|
trained_model_weight/{checkpoint-134368 β checkpoint-150176}/training_args.bin
RENAMED
|
File without changes
|
trained_model_weight/{checkpoint-134368 β checkpoint-150176}/vocab.json
RENAMED
|
File without changes
|
trained_model_weight/{checkpoint-142272 β checkpoint-158080}/config.json
RENAMED
|
File without changes
|
trained_model_weight/{checkpoint-142272 β checkpoint-158080}/merges.txt
RENAMED
|
File without changes
|
trained_model_weight/{checkpoint-142272 β checkpoint-158080}/model.safetensors
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 498615900
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:28d06a9da528573f3772d5f7ac75984b3c87e7f7b592e588e21285999bf9ae28
|
| 3 |
size 498615900
|
trained_model_weight/{checkpoint-134368 β checkpoint-158080}/optimizer.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 997354891
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c2a6888e939fc6950f402853d6240fa2796a4dc6408f91ac63c9a5a7174a925e
|
| 3 |
size 997354891
|
trained_model_weight/{checkpoint-134368 β checkpoint-158080}/rng_state.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c6fb5d288511442e95a33f40062bc5b91d7dc8a7586717333e643ac6237f8800
|
| 3 |
size 14645
|
trained_model_weight/{checkpoint-134368 β checkpoint-158080}/scaler.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1383
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a646bf66717afac82ce8d913739da701cd4bfd4b968cf6c76cf3b2666846e81b
|
| 3 |
size 1383
|
trained_model_weight/{checkpoint-142272 β checkpoint-158080}/scheduler.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:41886086ce5a07fc93222981b95a92a4f6ba75eeb558f670f7a7fd6ce7c8a01a
|
| 3 |
size 1465
|
trained_model_weight/{checkpoint-142272 β checkpoint-158080}/special_tokens_map.json
RENAMED
|
File without changes
|
trained_model_weight/{checkpoint-142272 β checkpoint-158080}/tokenizer.json
RENAMED
|
File without changes
|
trained_model_weight/{checkpoint-142272 β checkpoint-158080}/tokenizer_config.json
RENAMED
|
File without changes
|
trained_model_weight/{checkpoint-134368 β checkpoint-158080}/trainer_state.json
RENAMED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 0.
|
| 4 |
-
"best_model_checkpoint": "model/3_trained_model/checkpoint-
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -19021,6 +19021,3360 @@
|
|
| 19021 |
"eval_samples_per_second": 640.219,
|
| 19022 |
"eval_steps_per_second": 5.002,
|
| 19023 |
"step": 134368
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19024 |
}
|
| 19025 |
],
|
| 19026 |
"logging_steps": 50,
|
|
@@ -19035,7 +22389,7 @@
|
|
| 19035 |
"early_stopping_threshold": 0.0
|
| 19036 |
},
|
| 19037 |
"attributes": {
|
| 19038 |
-
"early_stopping_patience_counter":
|
| 19039 |
}
|
| 19040 |
},
|
| 19041 |
"TrainerControl": {
|
|
@@ -19049,7 +22403,7 @@
|
|
| 19049 |
"attributes": {}
|
| 19050 |
}
|
| 19051 |
},
|
| 19052 |
-
"total_flos":
|
| 19053 |
"train_batch_size": 128,
|
| 19054 |
"trial_name": null,
|
| 19055 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 150176,
|
| 3 |
+
"best_metric": 0.9650628878310451,
|
| 4 |
+
"best_model_checkpoint": "model/3_trained_model/checkpoint-150176",
|
| 5 |
+
"epoch": 20.0,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 158080,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 19021 |
"eval_samples_per_second": 640.219,
|
| 19022 |
"eval_steps_per_second": 5.002,
|
| 19023 |
"step": 134368
|
| 19024 |
+
},
|
| 19025 |
+
{
|
| 19026 |
+
"epoch": 17.004048582995953,
|
| 19027 |
+
"grad_norm": 0.9865813851356506,
|
| 19028 |
+
"learning_rate": 4.9256564916274984e-05,
|
| 19029 |
+
"loss": 0.035,
|
| 19030 |
+
"step": 134400
|
| 19031 |
+
},
|
| 19032 |
+
{
|
| 19033 |
+
"epoch": 17.010374493927124,
|
| 19034 |
+
"grad_norm": 4.903671741485596,
|
| 19035 |
+
"learning_rate": 4.925522808628461e-05,
|
| 19036 |
+
"loss": 0.0365,
|
| 19037 |
+
"step": 134450
|
| 19038 |
+
},
|
| 19039 |
+
{
|
| 19040 |
+
"epoch": 17.016700404858298,
|
| 19041 |
+
"grad_norm": 3.3794944286346436,
|
| 19042 |
+
"learning_rate": 4.925389007361703e-05,
|
| 19043 |
+
"loss": 0.0368,
|
| 19044 |
+
"step": 134500
|
| 19045 |
+
},
|
| 19046 |
+
{
|
| 19047 |
+
"epoch": 17.023026315789473,
|
| 19048 |
+
"grad_norm": 4.283618450164795,
|
| 19049 |
+
"learning_rate": 4.925255087833744e-05,
|
| 19050 |
+
"loss": 0.0361,
|
| 19051 |
+
"step": 134550
|
| 19052 |
+
},
|
| 19053 |
+
{
|
| 19054 |
+
"epoch": 17.029352226720647,
|
| 19055 |
+
"grad_norm": 2.782749652862549,
|
| 19056 |
+
"learning_rate": 4.9251210500511194e-05,
|
| 19057 |
+
"loss": 0.0357,
|
| 19058 |
+
"step": 134600
|
| 19059 |
+
},
|
| 19060 |
+
{
|
| 19061 |
+
"epoch": 17.03567813765182,
|
| 19062 |
+
"grad_norm": 0.3900313973426819,
|
| 19063 |
+
"learning_rate": 4.92498689402036e-05,
|
| 19064 |
+
"loss": 0.0352,
|
| 19065 |
+
"step": 134650
|
| 19066 |
+
},
|
| 19067 |
+
{
|
| 19068 |
+
"epoch": 17.042004048582996,
|
| 19069 |
+
"grad_norm": 4.356444358825684,
|
| 19070 |
+
"learning_rate": 4.9248526197480106e-05,
|
| 19071 |
+
"loss": 0.0392,
|
| 19072 |
+
"step": 134700
|
| 19073 |
+
},
|
| 19074 |
+
{
|
| 19075 |
+
"epoch": 17.04832995951417,
|
| 19076 |
+
"grad_norm": 7.123837471008301,
|
| 19077 |
+
"learning_rate": 4.9247182272406164e-05,
|
| 19078 |
+
"loss": 0.0403,
|
| 19079 |
+
"step": 134750
|
| 19080 |
+
},
|
| 19081 |
+
{
|
| 19082 |
+
"epoch": 17.054655870445345,
|
| 19083 |
+
"grad_norm": 1.2981936931610107,
|
| 19084 |
+
"learning_rate": 4.9245837165047314e-05,
|
| 19085 |
+
"loss": 0.0338,
|
| 19086 |
+
"step": 134800
|
| 19087 |
+
},
|
| 19088 |
+
{
|
| 19089 |
+
"epoch": 17.06098178137652,
|
| 19090 |
+
"grad_norm": 3.5616233348846436,
|
| 19091 |
+
"learning_rate": 4.924449087546914e-05,
|
| 19092 |
+
"loss": 0.0347,
|
| 19093 |
+
"step": 134850
|
| 19094 |
+
},
|
| 19095 |
+
{
|
| 19096 |
+
"epoch": 17.067307692307693,
|
| 19097 |
+
"grad_norm": 1.115071177482605,
|
| 19098 |
+
"learning_rate": 4.924314340373728e-05,
|
| 19099 |
+
"loss": 0.0431,
|
| 19100 |
+
"step": 134900
|
| 19101 |
+
},
|
| 19102 |
+
{
|
| 19103 |
+
"epoch": 17.073633603238868,
|
| 19104 |
+
"grad_norm": 1.1111431121826172,
|
| 19105 |
+
"learning_rate": 4.924179474991744e-05,
|
| 19106 |
+
"loss": 0.0348,
|
| 19107 |
+
"step": 134950
|
| 19108 |
+
},
|
| 19109 |
+
{
|
| 19110 |
+
"epoch": 17.079959514170042,
|
| 19111 |
+
"grad_norm": 3.354928493499756,
|
| 19112 |
+
"learning_rate": 4.9240444914075386e-05,
|
| 19113 |
+
"loss": 0.0361,
|
| 19114 |
+
"step": 135000
|
| 19115 |
+
},
|
| 19116 |
+
{
|
| 19117 |
+
"epoch": 17.086285425101213,
|
| 19118 |
+
"grad_norm": 9.625130653381348,
|
| 19119 |
+
"learning_rate": 4.923909389627693e-05,
|
| 19120 |
+
"loss": 0.0383,
|
| 19121 |
+
"step": 135050
|
| 19122 |
+
},
|
| 19123 |
+
{
|
| 19124 |
+
"epoch": 17.092611336032387,
|
| 19125 |
+
"grad_norm": 0.7868618369102478,
|
| 19126 |
+
"learning_rate": 4.9237741696587946e-05,
|
| 19127 |
+
"loss": 0.0372,
|
| 19128 |
+
"step": 135100
|
| 19129 |
+
},
|
| 19130 |
+
{
|
| 19131 |
+
"epoch": 17.098937246963562,
|
| 19132 |
+
"grad_norm": 1.425545573234558,
|
| 19133 |
+
"learning_rate": 4.9236388315074365e-05,
|
| 19134 |
+
"loss": 0.037,
|
| 19135 |
+
"step": 135150
|
| 19136 |
+
},
|
| 19137 |
+
{
|
| 19138 |
+
"epoch": 17.105263157894736,
|
| 19139 |
+
"grad_norm": 6.391676425933838,
|
| 19140 |
+
"learning_rate": 4.923503375180219e-05,
|
| 19141 |
+
"loss": 0.0395,
|
| 19142 |
+
"step": 135200
|
| 19143 |
+
},
|
| 19144 |
+
{
|
| 19145 |
+
"epoch": 17.11158906882591,
|
| 19146 |
+
"grad_norm": 3.261273145675659,
|
| 19147 |
+
"learning_rate": 4.9233678006837456e-05,
|
| 19148 |
+
"loss": 0.0337,
|
| 19149 |
+
"step": 135250
|
| 19150 |
+
},
|
| 19151 |
+
{
|
| 19152 |
+
"epoch": 17.117914979757085,
|
| 19153 |
+
"grad_norm": 6.938215732574463,
|
| 19154 |
+
"learning_rate": 4.923232108024628e-05,
|
| 19155 |
+
"loss": 0.0412,
|
| 19156 |
+
"step": 135300
|
| 19157 |
+
},
|
| 19158 |
+
{
|
| 19159 |
+
"epoch": 17.12424089068826,
|
| 19160 |
+
"grad_norm": 2.808600425720215,
|
| 19161 |
+
"learning_rate": 4.923096297209481e-05,
|
| 19162 |
+
"loss": 0.0359,
|
| 19163 |
+
"step": 135350
|
| 19164 |
+
},
|
| 19165 |
+
{
|
| 19166 |
+
"epoch": 17.130566801619434,
|
| 19167 |
+
"grad_norm": 7.682435035705566,
|
| 19168 |
+
"learning_rate": 4.922960368244928e-05,
|
| 19169 |
+
"loss": 0.0353,
|
| 19170 |
+
"step": 135400
|
| 19171 |
+
},
|
| 19172 |
+
{
|
| 19173 |
+
"epoch": 17.13689271255061,
|
| 19174 |
+
"grad_norm": 7.403769493103027,
|
| 19175 |
+
"learning_rate": 4.9228243211375956e-05,
|
| 19176 |
+
"loss": 0.0369,
|
| 19177 |
+
"step": 135450
|
| 19178 |
+
},
|
| 19179 |
+
{
|
| 19180 |
+
"epoch": 17.143218623481783,
|
| 19181 |
+
"grad_norm": 0.48791977763175964,
|
| 19182 |
+
"learning_rate": 4.9226881558941195e-05,
|
| 19183 |
+
"loss": 0.0356,
|
| 19184 |
+
"step": 135500
|
| 19185 |
+
},
|
| 19186 |
+
{
|
| 19187 |
+
"epoch": 17.149544534412957,
|
| 19188 |
+
"grad_norm": 11.574020385742188,
|
| 19189 |
+
"learning_rate": 4.9225518725211375e-05,
|
| 19190 |
+
"loss": 0.0489,
|
| 19191 |
+
"step": 135550
|
| 19192 |
+
},
|
| 19193 |
+
{
|
| 19194 |
+
"epoch": 17.155870445344128,
|
| 19195 |
+
"grad_norm": 2.202258586883545,
|
| 19196 |
+
"learning_rate": 4.9224154710252945e-05,
|
| 19197 |
+
"loss": 0.0333,
|
| 19198 |
+
"step": 135600
|
| 19199 |
+
},
|
| 19200 |
+
{
|
| 19201 |
+
"epoch": 17.162196356275302,
|
| 19202 |
+
"grad_norm": 5.60129976272583,
|
| 19203 |
+
"learning_rate": 4.922278951413242e-05,
|
| 19204 |
+
"loss": 0.0412,
|
| 19205 |
+
"step": 135650
|
| 19206 |
+
},
|
| 19207 |
+
{
|
| 19208 |
+
"epoch": 17.168522267206477,
|
| 19209 |
+
"grad_norm": 3.634636878967285,
|
| 19210 |
+
"learning_rate": 4.922142313691637e-05,
|
| 19211 |
+
"loss": 0.0319,
|
| 19212 |
+
"step": 135700
|
| 19213 |
+
},
|
| 19214 |
+
{
|
| 19215 |
+
"epoch": 17.17484817813765,
|
| 19216 |
+
"grad_norm": 5.244688987731934,
|
| 19217 |
+
"learning_rate": 4.922005557867141e-05,
|
| 19218 |
+
"loss": 0.0409,
|
| 19219 |
+
"step": 135750
|
| 19220 |
+
},
|
| 19221 |
+
{
|
| 19222 |
+
"epoch": 17.181174089068826,
|
| 19223 |
+
"grad_norm": 0.8519790768623352,
|
| 19224 |
+
"learning_rate": 4.9218686839464225e-05,
|
| 19225 |
+
"loss": 0.0404,
|
| 19226 |
+
"step": 135800
|
| 19227 |
+
},
|
| 19228 |
+
{
|
| 19229 |
+
"epoch": 17.1875,
|
| 19230 |
+
"grad_norm": 5.076608657836914,
|
| 19231 |
+
"learning_rate": 4.9217316919361556e-05,
|
| 19232 |
+
"loss": 0.0399,
|
| 19233 |
+
"step": 135850
|
| 19234 |
+
},
|
| 19235 |
+
{
|
| 19236 |
+
"epoch": 17.193825910931174,
|
| 19237 |
+
"grad_norm": 11.121240615844727,
|
| 19238 |
+
"learning_rate": 4.9215945818430204e-05,
|
| 19239 |
+
"loss": 0.0332,
|
| 19240 |
+
"step": 135900
|
| 19241 |
+
},
|
| 19242 |
+
{
|
| 19243 |
+
"epoch": 17.20015182186235,
|
| 19244 |
+
"grad_norm": 4.196413040161133,
|
| 19245 |
+
"learning_rate": 4.921457353673702e-05,
|
| 19246 |
+
"loss": 0.0436,
|
| 19247 |
+
"step": 135950
|
| 19248 |
+
},
|
| 19249 |
+
{
|
| 19250 |
+
"epoch": 17.206477732793523,
|
| 19251 |
+
"grad_norm": 2.676987409591675,
|
| 19252 |
+
"learning_rate": 4.921320007434891e-05,
|
| 19253 |
+
"loss": 0.0377,
|
| 19254 |
+
"step": 136000
|
| 19255 |
+
},
|
| 19256 |
+
{
|
| 19257 |
+
"epoch": 17.212803643724698,
|
| 19258 |
+
"grad_norm": 1.6354352235794067,
|
| 19259 |
+
"learning_rate": 4.921182543133285e-05,
|
| 19260 |
+
"loss": 0.047,
|
| 19261 |
+
"step": 136050
|
| 19262 |
+
},
|
| 19263 |
+
{
|
| 19264 |
+
"epoch": 17.219129554655872,
|
| 19265 |
+
"grad_norm": 7.44692850112915,
|
| 19266 |
+
"learning_rate": 4.921044960775587e-05,
|
| 19267 |
+
"loss": 0.0392,
|
| 19268 |
+
"step": 136100
|
| 19269 |
+
},
|
| 19270 |
+
{
|
| 19271 |
+
"epoch": 17.225455465587043,
|
| 19272 |
+
"grad_norm": 5.149559497833252,
|
| 19273 |
+
"learning_rate": 4.920907260368505e-05,
|
| 19274 |
+
"loss": 0.0337,
|
| 19275 |
+
"step": 136150
|
| 19276 |
+
},
|
| 19277 |
+
{
|
| 19278 |
+
"epoch": 17.231781376518217,
|
| 19279 |
+
"grad_norm": 1.6155211925506592,
|
| 19280 |
+
"learning_rate": 4.9207694419187536e-05,
|
| 19281 |
+
"loss": 0.0433,
|
| 19282 |
+
"step": 136200
|
| 19283 |
+
},
|
| 19284 |
+
{
|
| 19285 |
+
"epoch": 17.23810728744939,
|
| 19286 |
+
"grad_norm": 3.2345328330993652,
|
| 19287 |
+
"learning_rate": 4.9206315054330514e-05,
|
| 19288 |
+
"loss": 0.0415,
|
| 19289 |
+
"step": 136250
|
| 19290 |
+
},
|
| 19291 |
+
{
|
| 19292 |
+
"epoch": 17.244433198380566,
|
| 19293 |
+
"grad_norm": 4.9230546951293945,
|
| 19294 |
+
"learning_rate": 4.920493450918125e-05,
|
| 19295 |
+
"loss": 0.0383,
|
| 19296 |
+
"step": 136300
|
| 19297 |
+
},
|
| 19298 |
+
{
|
| 19299 |
+
"epoch": 17.25075910931174,
|
| 19300 |
+
"grad_norm": 0.3230721056461334,
|
| 19301 |
+
"learning_rate": 4.920355278380708e-05,
|
| 19302 |
+
"loss": 0.0364,
|
| 19303 |
+
"step": 136350
|
| 19304 |
+
},
|
| 19305 |
+
{
|
| 19306 |
+
"epoch": 17.257085020242915,
|
| 19307 |
+
"grad_norm": 3.7151944637298584,
|
| 19308 |
+
"learning_rate": 4.920216987827534e-05,
|
| 19309 |
+
"loss": 0.0308,
|
| 19310 |
+
"step": 136400
|
| 19311 |
+
},
|
| 19312 |
+
{
|
| 19313 |
+
"epoch": 17.26341093117409,
|
| 19314 |
+
"grad_norm": 0.8976667523384094,
|
| 19315 |
+
"learning_rate": 4.9200785792653483e-05,
|
| 19316 |
+
"loss": 0.0418,
|
| 19317 |
+
"step": 136450
|
| 19318 |
+
},
|
| 19319 |
+
{
|
| 19320 |
+
"epoch": 17.269736842105264,
|
| 19321 |
+
"grad_norm": 5.261508464813232,
|
| 19322 |
+
"learning_rate": 4.9199400527009e-05,
|
| 19323 |
+
"loss": 0.033,
|
| 19324 |
+
"step": 136500
|
| 19325 |
+
},
|
| 19326 |
+
{
|
| 19327 |
+
"epoch": 17.276062753036438,
|
| 19328 |
+
"grad_norm": 9.30493450164795,
|
| 19329 |
+
"learning_rate": 4.919801408140942e-05,
|
| 19330 |
+
"loss": 0.035,
|
| 19331 |
+
"step": 136550
|
| 19332 |
+
},
|
| 19333 |
+
{
|
| 19334 |
+
"epoch": 17.282388663967613,
|
| 19335 |
+
"grad_norm": 5.601887226104736,
|
| 19336 |
+
"learning_rate": 4.919662645592235e-05,
|
| 19337 |
+
"loss": 0.0447,
|
| 19338 |
+
"step": 136600
|
| 19339 |
+
},
|
| 19340 |
+
{
|
| 19341 |
+
"epoch": 17.288714574898787,
|
| 19342 |
+
"grad_norm": 2.428513288497925,
|
| 19343 |
+
"learning_rate": 4.9195237650615454e-05,
|
| 19344 |
+
"loss": 0.0376,
|
| 19345 |
+
"step": 136650
|
| 19346 |
+
},
|
| 19347 |
+
{
|
| 19348 |
+
"epoch": 17.295040485829958,
|
| 19349 |
+
"grad_norm": 0.5573081374168396,
|
| 19350 |
+
"learning_rate": 4.919384766555645e-05,
|
| 19351 |
+
"loss": 0.0416,
|
| 19352 |
+
"step": 136700
|
| 19353 |
+
},
|
| 19354 |
+
{
|
| 19355 |
+
"epoch": 17.301366396761132,
|
| 19356 |
+
"grad_norm": 3.8735458850860596,
|
| 19357 |
+
"learning_rate": 4.919245650081311e-05,
|
| 19358 |
+
"loss": 0.0357,
|
| 19359 |
+
"step": 136750
|
| 19360 |
+
},
|
| 19361 |
+
{
|
| 19362 |
+
"epoch": 17.307692307692307,
|
| 19363 |
+
"grad_norm": 5.203364372253418,
|
| 19364 |
+
"learning_rate": 4.919106415645327e-05,
|
| 19365 |
+
"loss": 0.0437,
|
| 19366 |
+
"step": 136800
|
| 19367 |
+
},
|
| 19368 |
+
{
|
| 19369 |
+
"epoch": 17.31401821862348,
|
| 19370 |
+
"grad_norm": 3.0349347591400146,
|
| 19371 |
+
"learning_rate": 4.9189670632544824e-05,
|
| 19372 |
+
"loss": 0.0423,
|
| 19373 |
+
"step": 136850
|
| 19374 |
+
},
|
| 19375 |
+
{
|
| 19376 |
+
"epoch": 17.320344129554655,
|
| 19377 |
+
"grad_norm": 10.724776268005371,
|
| 19378 |
+
"learning_rate": 4.9188275929155715e-05,
|
| 19379 |
+
"loss": 0.0333,
|
| 19380 |
+
"step": 136900
|
| 19381 |
+
},
|
| 19382 |
+
{
|
| 19383 |
+
"epoch": 17.32667004048583,
|
| 19384 |
+
"grad_norm": 3.7561917304992676,
|
| 19385 |
+
"learning_rate": 4.9186880046353946e-05,
|
| 19386 |
+
"loss": 0.0363,
|
| 19387 |
+
"step": 136950
|
| 19388 |
+
},
|
| 19389 |
+
{
|
| 19390 |
+
"epoch": 17.332995951417004,
|
| 19391 |
+
"grad_norm": 3.3792994022369385,
|
| 19392 |
+
"learning_rate": 4.918548298420758e-05,
|
| 19393 |
+
"loss": 0.0434,
|
| 19394 |
+
"step": 137000
|
| 19395 |
+
},
|
| 19396 |
+
{
|
| 19397 |
+
"epoch": 17.33932186234818,
|
| 19398 |
+
"grad_norm": 7.21069860458374,
|
| 19399 |
+
"learning_rate": 4.918408474278474e-05,
|
| 19400 |
+
"loss": 0.0418,
|
| 19401 |
+
"step": 137050
|
| 19402 |
+
},
|
| 19403 |
+
{
|
| 19404 |
+
"epoch": 17.345647773279353,
|
| 19405 |
+
"grad_norm": 5.195310592651367,
|
| 19406 |
+
"learning_rate": 4.91826853221536e-05,
|
| 19407 |
+
"loss": 0.0358,
|
| 19408 |
+
"step": 137100
|
| 19409 |
+
},
|
| 19410 |
+
{
|
| 19411 |
+
"epoch": 17.351973684210527,
|
| 19412 |
+
"grad_norm": 3.407991647720337,
|
| 19413 |
+
"learning_rate": 4.9181284722382406e-05,
|
| 19414 |
+
"loss": 0.0378,
|
| 19415 |
+
"step": 137150
|
| 19416 |
+
},
|
| 19417 |
+
{
|
| 19418 |
+
"epoch": 17.358299595141702,
|
| 19419 |
+
"grad_norm": 6.890528678894043,
|
| 19420 |
+
"learning_rate": 4.917988294353943e-05,
|
| 19421 |
+
"loss": 0.034,
|
| 19422 |
+
"step": 137200
|
| 19423 |
+
},
|
| 19424 |
+
{
|
| 19425 |
+
"epoch": 17.364625506072876,
|
| 19426 |
+
"grad_norm": 3.283585548400879,
|
| 19427 |
+
"learning_rate": 4.9178479985693046e-05,
|
| 19428 |
+
"loss": 0.039,
|
| 19429 |
+
"step": 137250
|
| 19430 |
+
},
|
| 19431 |
+
{
|
| 19432 |
+
"epoch": 17.370951417004047,
|
| 19433 |
+
"grad_norm": 7.473639965057373,
|
| 19434 |
+
"learning_rate": 4.9177075848911645e-05,
|
| 19435 |
+
"loss": 0.0446,
|
| 19436 |
+
"step": 137300
|
| 19437 |
+
},
|
| 19438 |
+
{
|
| 19439 |
+
"epoch": 17.37727732793522,
|
| 19440 |
+
"grad_norm": 0.3352794647216797,
|
| 19441 |
+
"learning_rate": 4.917567053326371e-05,
|
| 19442 |
+
"loss": 0.0373,
|
| 19443 |
+
"step": 137350
|
| 19444 |
+
},
|
| 19445 |
+
{
|
| 19446 |
+
"epoch": 17.383603238866396,
|
| 19447 |
+
"grad_norm": 4.713712692260742,
|
| 19448 |
+
"learning_rate": 4.917426403881774e-05,
|
| 19449 |
+
"loss": 0.0512,
|
| 19450 |
+
"step": 137400
|
| 19451 |
+
},
|
| 19452 |
+
{
|
| 19453 |
+
"epoch": 17.38992914979757,
|
| 19454 |
+
"grad_norm": 5.3047075271606445,
|
| 19455 |
+
"learning_rate": 4.9172856365642335e-05,
|
| 19456 |
+
"loss": 0.0398,
|
| 19457 |
+
"step": 137450
|
| 19458 |
+
},
|
| 19459 |
+
{
|
| 19460 |
+
"epoch": 17.396255060728745,
|
| 19461 |
+
"grad_norm": 2.588514804840088,
|
| 19462 |
+
"learning_rate": 4.9171447513806124e-05,
|
| 19463 |
+
"loss": 0.0326,
|
| 19464 |
+
"step": 137500
|
| 19465 |
+
},
|
| 19466 |
+
{
|
| 19467 |
+
"epoch": 17.40258097165992,
|
| 19468 |
+
"grad_norm": 1.8657335042953491,
|
| 19469 |
+
"learning_rate": 4.9170037483377796e-05,
|
| 19470 |
+
"loss": 0.0427,
|
| 19471 |
+
"step": 137550
|
| 19472 |
+
},
|
| 19473 |
+
{
|
| 19474 |
+
"epoch": 17.408906882591094,
|
| 19475 |
+
"grad_norm": 1.6117347478866577,
|
| 19476 |
+
"learning_rate": 4.916862627442612e-05,
|
| 19477 |
+
"loss": 0.0381,
|
| 19478 |
+
"step": 137600
|
| 19479 |
+
},
|
| 19480 |
+
{
|
| 19481 |
+
"epoch": 17.415232793522268,
|
| 19482 |
+
"grad_norm": 5.138952255249023,
|
| 19483 |
+
"learning_rate": 4.916721388701989e-05,
|
| 19484 |
+
"loss": 0.0427,
|
| 19485 |
+
"step": 137650
|
| 19486 |
+
},
|
| 19487 |
+
{
|
| 19488 |
+
"epoch": 17.421558704453442,
|
| 19489 |
+
"grad_norm": 7.885331153869629,
|
| 19490 |
+
"learning_rate": 4.916580032122799e-05,
|
| 19491 |
+
"loss": 0.0407,
|
| 19492 |
+
"step": 137700
|
| 19493 |
+
},
|
| 19494 |
+
{
|
| 19495 |
+
"epoch": 17.427884615384617,
|
| 19496 |
+
"grad_norm": 1.6614404916763306,
|
| 19497 |
+
"learning_rate": 4.916438557711933e-05,
|
| 19498 |
+
"loss": 0.0461,
|
| 19499 |
+
"step": 137750
|
| 19500 |
+
},
|
| 19501 |
+
{
|
| 19502 |
+
"epoch": 17.43421052631579,
|
| 19503 |
+
"grad_norm": 2.218142509460449,
|
| 19504 |
+
"learning_rate": 4.91629696547629e-05,
|
| 19505 |
+
"loss": 0.0395,
|
| 19506 |
+
"step": 137800
|
| 19507 |
+
},
|
| 19508 |
+
{
|
| 19509 |
+
"epoch": 17.440536437246962,
|
| 19510 |
+
"grad_norm": 0.6837593913078308,
|
| 19511 |
+
"learning_rate": 4.916155255422773e-05,
|
| 19512 |
+
"loss": 0.0372,
|
| 19513 |
+
"step": 137850
|
| 19514 |
+
},
|
| 19515 |
+
{
|
| 19516 |
+
"epoch": 17.446862348178136,
|
| 19517 |
+
"grad_norm": 9.183062553405762,
|
| 19518 |
+
"learning_rate": 4.916013427558294e-05,
|
| 19519 |
+
"loss": 0.0406,
|
| 19520 |
+
"step": 137900
|
| 19521 |
+
},
|
| 19522 |
+
{
|
| 19523 |
+
"epoch": 17.45318825910931,
|
| 19524 |
+
"grad_norm": 5.846785545349121,
|
| 19525 |
+
"learning_rate": 4.9158714818897656e-05,
|
| 19526 |
+
"loss": 0.0445,
|
| 19527 |
+
"step": 137950
|
| 19528 |
+
},
|
| 19529 |
+
{
|
| 19530 |
+
"epoch": 17.459514170040485,
|
| 19531 |
+
"grad_norm": 5.256739139556885,
|
| 19532 |
+
"learning_rate": 4.915729418424111e-05,
|
| 19533 |
+
"loss": 0.045,
|
| 19534 |
+
"step": 138000
|
| 19535 |
+
},
|
| 19536 |
+
{
|
| 19537 |
+
"epoch": 17.46584008097166,
|
| 19538 |
+
"grad_norm": 8.098567008972168,
|
| 19539 |
+
"learning_rate": 4.915587237168257e-05,
|
| 19540 |
+
"loss": 0.0453,
|
| 19541 |
+
"step": 138050
|
| 19542 |
+
},
|
| 19543 |
+
{
|
| 19544 |
+
"epoch": 17.472165991902834,
|
| 19545 |
+
"grad_norm": 6.694127082824707,
|
| 19546 |
+
"learning_rate": 4.915444938129136e-05,
|
| 19547 |
+
"loss": 0.0418,
|
| 19548 |
+
"step": 138100
|
| 19549 |
+
},
|
| 19550 |
+
{
|
| 19551 |
+
"epoch": 17.47849190283401,
|
| 19552 |
+
"grad_norm": 5.5596089363098145,
|
| 19553 |
+
"learning_rate": 4.915302521313686e-05,
|
| 19554 |
+
"loss": 0.0455,
|
| 19555 |
+
"step": 138150
|
| 19556 |
+
},
|
| 19557 |
+
{
|
| 19558 |
+
"epoch": 17.484817813765183,
|
| 19559 |
+
"grad_norm": 2.395667314529419,
|
| 19560 |
+
"learning_rate": 4.915159986728851e-05,
|
| 19561 |
+
"loss": 0.0409,
|
| 19562 |
+
"step": 138200
|
| 19563 |
+
},
|
| 19564 |
+
{
|
| 19565 |
+
"epoch": 17.491143724696357,
|
| 19566 |
+
"grad_norm": 5.9936370849609375,
|
| 19567 |
+
"learning_rate": 4.915017334381582e-05,
|
| 19568 |
+
"loss": 0.0399,
|
| 19569 |
+
"step": 138250
|
| 19570 |
+
},
|
| 19571 |
+
{
|
| 19572 |
+
"epoch": 17.49746963562753,
|
| 19573 |
+
"grad_norm": 4.930709362030029,
|
| 19574 |
+
"learning_rate": 4.914874564278834e-05,
|
| 19575 |
+
"loss": 0.0416,
|
| 19576 |
+
"step": 138300
|
| 19577 |
+
},
|
| 19578 |
+
{
|
| 19579 |
+
"epoch": 17.503795546558706,
|
| 19580 |
+
"grad_norm": 1.366414189338684,
|
| 19581 |
+
"learning_rate": 4.9147316764275694e-05,
|
| 19582 |
+
"loss": 0.041,
|
| 19583 |
+
"step": 138350
|
| 19584 |
+
},
|
| 19585 |
+
{
|
| 19586 |
+
"epoch": 17.510121457489877,
|
| 19587 |
+
"grad_norm": 2.595989942550659,
|
| 19588 |
+
"learning_rate": 4.9145886708347546e-05,
|
| 19589 |
+
"loss": 0.0373,
|
| 19590 |
+
"step": 138400
|
| 19591 |
+
},
|
| 19592 |
+
{
|
| 19593 |
+
"epoch": 17.51644736842105,
|
| 19594 |
+
"grad_norm": 4.697597980499268,
|
| 19595 |
+
"learning_rate": 4.9144455475073614e-05,
|
| 19596 |
+
"loss": 0.0392,
|
| 19597 |
+
"step": 138450
|
| 19598 |
+
},
|
| 19599 |
+
{
|
| 19600 |
+
"epoch": 17.522773279352226,
|
| 19601 |
+
"grad_norm": 5.7686662673950195,
|
| 19602 |
+
"learning_rate": 4.914302306452371e-05,
|
| 19603 |
+
"loss": 0.0427,
|
| 19604 |
+
"step": 138500
|
| 19605 |
+
},
|
| 19606 |
+
{
|
| 19607 |
+
"epoch": 17.5290991902834,
|
| 19608 |
+
"grad_norm": 5.802638053894043,
|
| 19609 |
+
"learning_rate": 4.9141589476767647e-05,
|
| 19610 |
+
"loss": 0.0331,
|
| 19611 |
+
"step": 138550
|
| 19612 |
+
},
|
| 19613 |
+
{
|
| 19614 |
+
"epoch": 17.535425101214575,
|
| 19615 |
+
"grad_norm": 3.5534443855285645,
|
| 19616 |
+
"learning_rate": 4.914015471187535e-05,
|
| 19617 |
+
"loss": 0.0385,
|
| 19618 |
+
"step": 138600
|
| 19619 |
+
},
|
| 19620 |
+
{
|
| 19621 |
+
"epoch": 17.54175101214575,
|
| 19622 |
+
"grad_norm": 2.1027610301971436,
|
| 19623 |
+
"learning_rate": 4.913871876991678e-05,
|
| 19624 |
+
"loss": 0.0395,
|
| 19625 |
+
"step": 138650
|
| 19626 |
+
},
|
| 19627 |
+
{
|
| 19628 |
+
"epoch": 17.548076923076923,
|
| 19629 |
+
"grad_norm": 10.725923538208008,
|
| 19630 |
+
"learning_rate": 4.913728165096193e-05,
|
| 19631 |
+
"loss": 0.038,
|
| 19632 |
+
"step": 138700
|
| 19633 |
+
},
|
| 19634 |
+
{
|
| 19635 |
+
"epoch": 17.554402834008098,
|
| 19636 |
+
"grad_norm": 6.706634044647217,
|
| 19637 |
+
"learning_rate": 4.913584335508089e-05,
|
| 19638 |
+
"loss": 0.0442,
|
| 19639 |
+
"step": 138750
|
| 19640 |
+
},
|
| 19641 |
+
{
|
| 19642 |
+
"epoch": 17.560728744939272,
|
| 19643 |
+
"grad_norm": 3.6727170944213867,
|
| 19644 |
+
"learning_rate": 4.9134403882343796e-05,
|
| 19645 |
+
"loss": 0.0406,
|
| 19646 |
+
"step": 138800
|
| 19647 |
+
},
|
| 19648 |
+
{
|
| 19649 |
+
"epoch": 17.567054655870447,
|
| 19650 |
+
"grad_norm": 4.388698101043701,
|
| 19651 |
+
"learning_rate": 4.913296323282082e-05,
|
| 19652 |
+
"loss": 0.0467,
|
| 19653 |
+
"step": 138850
|
| 19654 |
+
},
|
| 19655 |
+
{
|
| 19656 |
+
"epoch": 17.57338056680162,
|
| 19657 |
+
"grad_norm": 8.199777603149414,
|
| 19658 |
+
"learning_rate": 4.913152140658222e-05,
|
| 19659 |
+
"loss": 0.0409,
|
| 19660 |
+
"step": 138900
|
| 19661 |
+
},
|
| 19662 |
+
{
|
| 19663 |
+
"epoch": 17.579706477732792,
|
| 19664 |
+
"grad_norm": 4.869016647338867,
|
| 19665 |
+
"learning_rate": 4.913007840369829e-05,
|
| 19666 |
+
"loss": 0.037,
|
| 19667 |
+
"step": 138950
|
| 19668 |
+
},
|
| 19669 |
+
{
|
| 19670 |
+
"epoch": 17.586032388663966,
|
| 19671 |
+
"grad_norm": 3.429218292236328,
|
| 19672 |
+
"learning_rate": 4.91286342242394e-05,
|
| 19673 |
+
"loss": 0.0471,
|
| 19674 |
+
"step": 139000
|
| 19675 |
+
},
|
| 19676 |
+
{
|
| 19677 |
+
"epoch": 17.59235829959514,
|
| 19678 |
+
"grad_norm": 7.292686939239502,
|
| 19679 |
+
"learning_rate": 4.912718886827596e-05,
|
| 19680 |
+
"loss": 0.0382,
|
| 19681 |
+
"step": 139050
|
| 19682 |
+
},
|
| 19683 |
+
{
|
| 19684 |
+
"epoch": 17.598684210526315,
|
| 19685 |
+
"grad_norm": 4.38794469833374,
|
| 19686 |
+
"learning_rate": 4.9125742335878445e-05,
|
| 19687 |
+
"loss": 0.0434,
|
| 19688 |
+
"step": 139100
|
| 19689 |
+
},
|
| 19690 |
+
{
|
| 19691 |
+
"epoch": 17.60501012145749,
|
| 19692 |
+
"grad_norm": 6.403473377227783,
|
| 19693 |
+
"learning_rate": 4.9124294627117394e-05,
|
| 19694 |
+
"loss": 0.0424,
|
| 19695 |
+
"step": 139150
|
| 19696 |
+
},
|
| 19697 |
+
{
|
| 19698 |
+
"epoch": 17.611336032388664,
|
| 19699 |
+
"grad_norm": 1.097304105758667,
|
| 19700 |
+
"learning_rate": 4.912284574206339e-05,
|
| 19701 |
+
"loss": 0.0358,
|
| 19702 |
+
"step": 139200
|
| 19703 |
+
},
|
| 19704 |
+
{
|
| 19705 |
+
"epoch": 17.61766194331984,
|
| 19706 |
+
"grad_norm": 5.368671894073486,
|
| 19707 |
+
"learning_rate": 4.912139568078709e-05,
|
| 19708 |
+
"loss": 0.0438,
|
| 19709 |
+
"step": 139250
|
| 19710 |
+
},
|
| 19711 |
+
{
|
| 19712 |
+
"epoch": 17.623987854251013,
|
| 19713 |
+
"grad_norm": 3.4368090629577637,
|
| 19714 |
+
"learning_rate": 4.911994444335919e-05,
|
| 19715 |
+
"loss": 0.0415,
|
| 19716 |
+
"step": 139300
|
| 19717 |
+
},
|
| 19718 |
+
{
|
| 19719 |
+
"epoch": 17.630313765182187,
|
| 19720 |
+
"grad_norm": 4.823304176330566,
|
| 19721 |
+
"learning_rate": 4.911849202985045e-05,
|
| 19722 |
+
"loss": 0.0352,
|
| 19723 |
+
"step": 139350
|
| 19724 |
+
},
|
| 19725 |
+
{
|
| 19726 |
+
"epoch": 17.63663967611336,
|
| 19727 |
+
"grad_norm": 5.125885486602783,
|
| 19728 |
+
"learning_rate": 4.91170384403317e-05,
|
| 19729 |
+
"loss": 0.0459,
|
| 19730 |
+
"step": 139400
|
| 19731 |
+
},
|
| 19732 |
+
{
|
| 19733 |
+
"epoch": 17.642965587044536,
|
| 19734 |
+
"grad_norm": 6.066320896148682,
|
| 19735 |
+
"learning_rate": 4.91155836748738e-05,
|
| 19736 |
+
"loss": 0.037,
|
| 19737 |
+
"step": 139450
|
| 19738 |
+
},
|
| 19739 |
+
{
|
| 19740 |
+
"epoch": 17.64929149797571,
|
| 19741 |
+
"grad_norm": 4.85833215713501,
|
| 19742 |
+
"learning_rate": 4.91141277335477e-05,
|
| 19743 |
+
"loss": 0.0391,
|
| 19744 |
+
"step": 139500
|
| 19745 |
+
},
|
| 19746 |
+
{
|
| 19747 |
+
"epoch": 17.65561740890688,
|
| 19748 |
+
"grad_norm": 5.783332824707031,
|
| 19749 |
+
"learning_rate": 4.911267061642438e-05,
|
| 19750 |
+
"loss": 0.0416,
|
| 19751 |
+
"step": 139550
|
| 19752 |
+
},
|
| 19753 |
+
{
|
| 19754 |
+
"epoch": 17.661943319838056,
|
| 19755 |
+
"grad_norm": 4.913445472717285,
|
| 19756 |
+
"learning_rate": 4.91112123235749e-05,
|
| 19757 |
+
"loss": 0.0398,
|
| 19758 |
+
"step": 139600
|
| 19759 |
+
},
|
| 19760 |
+
{
|
| 19761 |
+
"epoch": 17.66826923076923,
|
| 19762 |
+
"grad_norm": 12.016042709350586,
|
| 19763 |
+
"learning_rate": 4.9109752855070345e-05,
|
| 19764 |
+
"loss": 0.0411,
|
| 19765 |
+
"step": 139650
|
| 19766 |
+
},
|
| 19767 |
+
{
|
| 19768 |
+
"epoch": 17.674595141700404,
|
| 19769 |
+
"grad_norm": 7.794407844543457,
|
| 19770 |
+
"learning_rate": 4.9108292210981904e-05,
|
| 19771 |
+
"loss": 0.0438,
|
| 19772 |
+
"step": 139700
|
| 19773 |
+
},
|
| 19774 |
+
{
|
| 19775 |
+
"epoch": 17.68092105263158,
|
| 19776 |
+
"grad_norm": 3.418745756149292,
|
| 19777 |
+
"learning_rate": 4.9106830391380787e-05,
|
| 19778 |
+
"loss": 0.0357,
|
| 19779 |
+
"step": 139750
|
| 19780 |
+
},
|
| 19781 |
+
{
|
| 19782 |
+
"epoch": 17.687246963562753,
|
| 19783 |
+
"grad_norm": 5.839387893676758,
|
| 19784 |
+
"learning_rate": 4.9105367396338265e-05,
|
| 19785 |
+
"loss": 0.0412,
|
| 19786 |
+
"step": 139800
|
| 19787 |
+
},
|
| 19788 |
+
{
|
| 19789 |
+
"epoch": 17.693572874493928,
|
| 19790 |
+
"grad_norm": 10.380568504333496,
|
| 19791 |
+
"learning_rate": 4.910390322592569e-05,
|
| 19792 |
+
"loss": 0.0517,
|
| 19793 |
+
"step": 139850
|
| 19794 |
+
},
|
| 19795 |
+
{
|
| 19796 |
+
"epoch": 17.699898785425102,
|
| 19797 |
+
"grad_norm": 3.1316144466400146,
|
| 19798 |
+
"learning_rate": 4.910243788021443e-05,
|
| 19799 |
+
"loss": 0.0399,
|
| 19800 |
+
"step": 139900
|
| 19801 |
+
},
|
| 19802 |
+
{
|
| 19803 |
+
"epoch": 17.706224696356276,
|
| 19804 |
+
"grad_norm": 1.631729245185852,
|
| 19805 |
+
"learning_rate": 4.910097135927595e-05,
|
| 19806 |
+
"loss": 0.0453,
|
| 19807 |
+
"step": 139950
|
| 19808 |
+
},
|
| 19809 |
+
{
|
| 19810 |
+
"epoch": 17.71255060728745,
|
| 19811 |
+
"grad_norm": 6.041396617889404,
|
| 19812 |
+
"learning_rate": 4.909950366318176e-05,
|
| 19813 |
+
"loss": 0.0439,
|
| 19814 |
+
"step": 140000
|
| 19815 |
+
},
|
| 19816 |
+
{
|
| 19817 |
+
"epoch": 17.718876518218625,
|
| 19818 |
+
"grad_norm": 7.771120071411133,
|
| 19819 |
+
"learning_rate": 4.909803479200341e-05,
|
| 19820 |
+
"loss": 0.0442,
|
| 19821 |
+
"step": 140050
|
| 19822 |
+
},
|
| 19823 |
+
{
|
| 19824 |
+
"epoch": 17.725202429149796,
|
| 19825 |
+
"grad_norm": 4.687602996826172,
|
| 19826 |
+
"learning_rate": 4.9096564745812546e-05,
|
| 19827 |
+
"loss": 0.0452,
|
| 19828 |
+
"step": 140100
|
| 19829 |
+
},
|
| 19830 |
+
{
|
| 19831 |
+
"epoch": 17.73152834008097,
|
| 19832 |
+
"grad_norm": 3.5146102905273438,
|
| 19833 |
+
"learning_rate": 4.909509352468082e-05,
|
| 19834 |
+
"loss": 0.0426,
|
| 19835 |
+
"step": 140150
|
| 19836 |
+
},
|
| 19837 |
+
{
|
| 19838 |
+
"epoch": 17.737854251012145,
|
| 19839 |
+
"grad_norm": 0.9282062649726868,
|
| 19840 |
+
"learning_rate": 4.909362112867999e-05,
|
| 19841 |
+
"loss": 0.0359,
|
| 19842 |
+
"step": 140200
|
| 19843 |
+
},
|
| 19844 |
+
{
|
| 19845 |
+
"epoch": 17.74418016194332,
|
| 19846 |
+
"grad_norm": 1.4117069244384766,
|
| 19847 |
+
"learning_rate": 4.909214755788184e-05,
|
| 19848 |
+
"loss": 0.0355,
|
| 19849 |
+
"step": 140250
|
| 19850 |
+
},
|
| 19851 |
+
{
|
| 19852 |
+
"epoch": 17.750506072874494,
|
| 19853 |
+
"grad_norm": 5.453182220458984,
|
| 19854 |
+
"learning_rate": 4.9090672812358214e-05,
|
| 19855 |
+
"loss": 0.0337,
|
| 19856 |
+
"step": 140300
|
| 19857 |
+
},
|
| 19858 |
+
{
|
| 19859 |
+
"epoch": 17.756831983805668,
|
| 19860 |
+
"grad_norm": 5.728852272033691,
|
| 19861 |
+
"learning_rate": 4.908919689218103e-05,
|
| 19862 |
+
"loss": 0.0438,
|
| 19863 |
+
"step": 140350
|
| 19864 |
+
},
|
| 19865 |
+
{
|
| 19866 |
+
"epoch": 17.763157894736842,
|
| 19867 |
+
"grad_norm": 0.698963463306427,
|
| 19868 |
+
"learning_rate": 4.908771979742225e-05,
|
| 19869 |
+
"loss": 0.0397,
|
| 19870 |
+
"step": 140400
|
| 19871 |
+
},
|
| 19872 |
+
{
|
| 19873 |
+
"epoch": 17.769483805668017,
|
| 19874 |
+
"grad_norm": 3.1075263023376465,
|
| 19875 |
+
"learning_rate": 4.90862415281539e-05,
|
| 19876 |
+
"loss": 0.0349,
|
| 19877 |
+
"step": 140450
|
| 19878 |
+
},
|
| 19879 |
+
{
|
| 19880 |
+
"epoch": 17.77580971659919,
|
| 19881 |
+
"grad_norm": 8.731734275817871,
|
| 19882 |
+
"learning_rate": 4.908476208444806e-05,
|
| 19883 |
+
"loss": 0.0388,
|
| 19884 |
+
"step": 140500
|
| 19885 |
+
},
|
| 19886 |
+
{
|
| 19887 |
+
"epoch": 17.782135627530366,
|
| 19888 |
+
"grad_norm": 6.192546844482422,
|
| 19889 |
+
"learning_rate": 4.908328146637685e-05,
|
| 19890 |
+
"loss": 0.045,
|
| 19891 |
+
"step": 140550
|
| 19892 |
+
},
|
| 19893 |
+
{
|
| 19894 |
+
"epoch": 17.78846153846154,
|
| 19895 |
+
"grad_norm": 4.404603481292725,
|
| 19896 |
+
"learning_rate": 4.9081799674012494e-05,
|
| 19897 |
+
"loss": 0.0406,
|
| 19898 |
+
"step": 140600
|
| 19899 |
+
},
|
| 19900 |
+
{
|
| 19901 |
+
"epoch": 17.79478744939271,
|
| 19902 |
+
"grad_norm": 4.874458312988281,
|
| 19903 |
+
"learning_rate": 4.908031670742722e-05,
|
| 19904 |
+
"loss": 0.04,
|
| 19905 |
+
"step": 140650
|
| 19906 |
+
},
|
| 19907 |
+
{
|
| 19908 |
+
"epoch": 17.801113360323885,
|
| 19909 |
+
"grad_norm": 2.6905007362365723,
|
| 19910 |
+
"learning_rate": 4.907883256669335e-05,
|
| 19911 |
+
"loss": 0.034,
|
| 19912 |
+
"step": 140700
|
| 19913 |
+
},
|
| 19914 |
+
{
|
| 19915 |
+
"epoch": 17.80743927125506,
|
| 19916 |
+
"grad_norm": 8.366782188415527,
|
| 19917 |
+
"learning_rate": 4.9077347251883245e-05,
|
| 19918 |
+
"loss": 0.0393,
|
| 19919 |
+
"step": 140750
|
| 19920 |
+
},
|
| 19921 |
+
{
|
| 19922 |
+
"epoch": 17.813765182186234,
|
| 19923 |
+
"grad_norm": 5.466225624084473,
|
| 19924 |
+
"learning_rate": 4.907586076306933e-05,
|
| 19925 |
+
"loss": 0.0465,
|
| 19926 |
+
"step": 140800
|
| 19927 |
+
},
|
| 19928 |
+
{
|
| 19929 |
+
"epoch": 17.82009109311741,
|
| 19930 |
+
"grad_norm": 6.20123291015625,
|
| 19931 |
+
"learning_rate": 4.907437310032408e-05,
|
| 19932 |
+
"loss": 0.041,
|
| 19933 |
+
"step": 140850
|
| 19934 |
+
},
|
| 19935 |
+
{
|
| 19936 |
+
"epoch": 17.826417004048583,
|
| 19937 |
+
"grad_norm": 3.9706645011901855,
|
| 19938 |
+
"learning_rate": 4.907288426372004e-05,
|
| 19939 |
+
"loss": 0.039,
|
| 19940 |
+
"step": 140900
|
| 19941 |
+
},
|
| 19942 |
+
{
|
| 19943 |
+
"epoch": 17.832742914979757,
|
| 19944 |
+
"grad_norm": 4.277196407318115,
|
| 19945 |
+
"learning_rate": 4.9071394253329803e-05,
|
| 19946 |
+
"loss": 0.0399,
|
| 19947 |
+
"step": 140950
|
| 19948 |
+
},
|
| 19949 |
+
{
|
| 19950 |
+
"epoch": 17.839068825910932,
|
| 19951 |
+
"grad_norm": 4.627282619476318,
|
| 19952 |
+
"learning_rate": 4.906990306922603e-05,
|
| 19953 |
+
"loss": 0.0511,
|
| 19954 |
+
"step": 141000
|
| 19955 |
+
},
|
| 19956 |
+
{
|
| 19957 |
+
"epoch": 17.845394736842106,
|
| 19958 |
+
"grad_norm": 3.7043867111206055,
|
| 19959 |
+
"learning_rate": 4.906841071148142e-05,
|
| 19960 |
+
"loss": 0.0371,
|
| 19961 |
+
"step": 141050
|
| 19962 |
+
},
|
| 19963 |
+
{
|
| 19964 |
+
"epoch": 17.85172064777328,
|
| 19965 |
+
"grad_norm": 1.1031543016433716,
|
| 19966 |
+
"learning_rate": 4.906691718016873e-05,
|
| 19967 |
+
"loss": 0.0375,
|
| 19968 |
+
"step": 141100
|
| 19969 |
+
},
|
| 19970 |
+
{
|
| 19971 |
+
"epoch": 17.858046558704455,
|
| 19972 |
+
"grad_norm": 9.02058219909668,
|
| 19973 |
+
"learning_rate": 4.9065422475360805e-05,
|
| 19974 |
+
"loss": 0.049,
|
| 19975 |
+
"step": 141150
|
| 19976 |
+
},
|
| 19977 |
+
{
|
| 19978 |
+
"epoch": 17.864372469635626,
|
| 19979 |
+
"grad_norm": 1.082626461982727,
|
| 19980 |
+
"learning_rate": 4.9063926597130514e-05,
|
| 19981 |
+
"loss": 0.0456,
|
| 19982 |
+
"step": 141200
|
| 19983 |
+
},
|
| 19984 |
+
{
|
| 19985 |
+
"epoch": 17.8706983805668,
|
| 19986 |
+
"grad_norm": 2.285248041152954,
|
| 19987 |
+
"learning_rate": 4.9062429545550807e-05,
|
| 19988 |
+
"loss": 0.0395,
|
| 19989 |
+
"step": 141250
|
| 19990 |
+
},
|
| 19991 |
+
{
|
| 19992 |
+
"epoch": 17.877024291497975,
|
| 19993 |
+
"grad_norm": 7.9377288818359375,
|
| 19994 |
+
"learning_rate": 4.906093132069466e-05,
|
| 19995 |
+
"loss": 0.0484,
|
| 19996 |
+
"step": 141300
|
| 19997 |
+
},
|
| 19998 |
+
{
|
| 19999 |
+
"epoch": 17.88335020242915,
|
| 20000 |
+
"grad_norm": 2.8776113986968994,
|
| 20001 |
+
"learning_rate": 4.905943192263515e-05,
|
| 20002 |
+
"loss": 0.04,
|
| 20003 |
+
"step": 141350
|
| 20004 |
+
},
|
| 20005 |
+
{
|
| 20006 |
+
"epoch": 17.889676113360323,
|
| 20007 |
+
"grad_norm": 4.85302734375,
|
| 20008 |
+
"learning_rate": 4.905793135144536e-05,
|
| 20009 |
+
"loss": 0.039,
|
| 20010 |
+
"step": 141400
|
| 20011 |
+
},
|
| 20012 |
+
{
|
| 20013 |
+
"epoch": 17.896002024291498,
|
| 20014 |
+
"grad_norm": 3.401076316833496,
|
| 20015 |
+
"learning_rate": 4.905642960719848e-05,
|
| 20016 |
+
"loss": 0.0359,
|
| 20017 |
+
"step": 141450
|
| 20018 |
+
},
|
| 20019 |
+
{
|
| 20020 |
+
"epoch": 17.902327935222672,
|
| 20021 |
+
"grad_norm": 2.743190288543701,
|
| 20022 |
+
"learning_rate": 4.905492668996774e-05,
|
| 20023 |
+
"loss": 0.0444,
|
| 20024 |
+
"step": 141500
|
| 20025 |
+
},
|
| 20026 |
+
{
|
| 20027 |
+
"epoch": 17.908653846153847,
|
| 20028 |
+
"grad_norm": 5.680714130401611,
|
| 20029 |
+
"learning_rate": 4.9053422599826396e-05,
|
| 20030 |
+
"loss": 0.0383,
|
| 20031 |
+
"step": 141550
|
| 20032 |
+
},
|
| 20033 |
+
{
|
| 20034 |
+
"epoch": 17.91497975708502,
|
| 20035 |
+
"grad_norm": 8.38867473602295,
|
| 20036 |
+
"learning_rate": 4.9051917336847795e-05,
|
| 20037 |
+
"loss": 0.0446,
|
| 20038 |
+
"step": 141600
|
| 20039 |
+
},
|
| 20040 |
+
{
|
| 20041 |
+
"epoch": 17.921305668016196,
|
| 20042 |
+
"grad_norm": 6.715274333953857,
|
| 20043 |
+
"learning_rate": 4.905041090110535e-05,
|
| 20044 |
+
"loss": 0.0427,
|
| 20045 |
+
"step": 141650
|
| 20046 |
+
},
|
| 20047 |
+
{
|
| 20048 |
+
"epoch": 17.92763157894737,
|
| 20049 |
+
"grad_norm": 3.834745407104492,
|
| 20050 |
+
"learning_rate": 4.9048903292672494e-05,
|
| 20051 |
+
"loss": 0.047,
|
| 20052 |
+
"step": 141700
|
| 20053 |
+
},
|
| 20054 |
+
{
|
| 20055 |
+
"epoch": 17.933957489878544,
|
| 20056 |
+
"grad_norm": 4.528051376342773,
|
| 20057 |
+
"learning_rate": 4.9047394511622745e-05,
|
| 20058 |
+
"loss": 0.0455,
|
| 20059 |
+
"step": 141750
|
| 20060 |
+
},
|
| 20061 |
+
{
|
| 20062 |
+
"epoch": 17.940283400809715,
|
| 20063 |
+
"grad_norm": 7.673669815063477,
|
| 20064 |
+
"learning_rate": 4.904588455802968e-05,
|
| 20065 |
+
"loss": 0.044,
|
| 20066 |
+
"step": 141800
|
| 20067 |
+
},
|
| 20068 |
+
{
|
| 20069 |
+
"epoch": 17.94660931174089,
|
| 20070 |
+
"grad_norm": 3.730109214782715,
|
| 20071 |
+
"learning_rate": 4.904437343196691e-05,
|
| 20072 |
+
"loss": 0.0397,
|
| 20073 |
+
"step": 141850
|
| 20074 |
+
},
|
| 20075 |
+
{
|
| 20076 |
+
"epoch": 17.952935222672064,
|
| 20077 |
+
"grad_norm": 13.706377983093262,
|
| 20078 |
+
"learning_rate": 4.9042861133508125e-05,
|
| 20079 |
+
"loss": 0.0339,
|
| 20080 |
+
"step": 141900
|
| 20081 |
+
},
|
| 20082 |
+
{
|
| 20083 |
+
"epoch": 17.95926113360324,
|
| 20084 |
+
"grad_norm": 3.2018661499023438,
|
| 20085 |
+
"learning_rate": 4.904134766272706e-05,
|
| 20086 |
+
"loss": 0.0436,
|
| 20087 |
+
"step": 141950
|
| 20088 |
+
},
|
| 20089 |
+
{
|
| 20090 |
+
"epoch": 17.965587044534413,
|
| 20091 |
+
"grad_norm": 2.287898063659668,
|
| 20092 |
+
"learning_rate": 4.90398330196975e-05,
|
| 20093 |
+
"loss": 0.0435,
|
| 20094 |
+
"step": 142000
|
| 20095 |
+
},
|
| 20096 |
+
{
|
| 20097 |
+
"epoch": 17.971912955465587,
|
| 20098 |
+
"grad_norm": 3.8661112785339355,
|
| 20099 |
+
"learning_rate": 4.9038317204493324e-05,
|
| 20100 |
+
"loss": 0.0387,
|
| 20101 |
+
"step": 142050
|
| 20102 |
+
},
|
| 20103 |
+
{
|
| 20104 |
+
"epoch": 17.97823886639676,
|
| 20105 |
+
"grad_norm": 4.093972682952881,
|
| 20106 |
+
"learning_rate": 4.903680021718843e-05,
|
| 20107 |
+
"loss": 0.0436,
|
| 20108 |
+
"step": 142100
|
| 20109 |
+
},
|
| 20110 |
+
{
|
| 20111 |
+
"epoch": 17.984564777327936,
|
| 20112 |
+
"grad_norm": 4.2356696128845215,
|
| 20113 |
+
"learning_rate": 4.903528205785678e-05,
|
| 20114 |
+
"loss": 0.0477,
|
| 20115 |
+
"step": 142150
|
| 20116 |
+
},
|
| 20117 |
+
{
|
| 20118 |
+
"epoch": 17.99089068825911,
|
| 20119 |
+
"grad_norm": 1.6700513362884521,
|
| 20120 |
+
"learning_rate": 4.903376272657241e-05,
|
| 20121 |
+
"loss": 0.0464,
|
| 20122 |
+
"step": 142200
|
| 20123 |
+
},
|
| 20124 |
+
{
|
| 20125 |
+
"epoch": 17.997216599190285,
|
| 20126 |
+
"grad_norm": 5.324566841125488,
|
| 20127 |
+
"learning_rate": 4.9032242223409395e-05,
|
| 20128 |
+
"loss": 0.0336,
|
| 20129 |
+
"step": 142250
|
| 20130 |
+
},
|
| 20131 |
+
{
|
| 20132 |
+
"epoch": 18.0,
|
| 20133 |
+
"eval_accuracy": 0.9612695826842018,
|
| 20134 |
+
"eval_f1": 0.9613590396280814,
|
| 20135 |
+
"eval_loss": 0.16303451359272003,
|
| 20136 |
+
"eval_precision": 0.961131568797803,
|
| 20137 |
+
"eval_recall": 0.961958521294083,
|
| 20138 |
+
"eval_runtime": 394.6465,
|
| 20139 |
+
"eval_samples_per_second": 640.832,
|
| 20140 |
+
"eval_steps_per_second": 5.007,
|
| 20141 |
+
"step": 142272
|
| 20142 |
+
},
|
| 20143 |
+
{
|
| 20144 |
+
"epoch": 18.00354251012146,
|
| 20145 |
+
"grad_norm": 0.07442587614059448,
|
| 20146 |
+
"learning_rate": 4.903072054844188e-05,
|
| 20147 |
+
"loss": 0.0361,
|
| 20148 |
+
"step": 142300
|
| 20149 |
+
},
|
| 20150 |
+
{
|
| 20151 |
+
"epoch": 18.00986842105263,
|
| 20152 |
+
"grad_norm": 0.5681266188621521,
|
| 20153 |
+
"learning_rate": 4.9029197701744056e-05,
|
| 20154 |
+
"loss": 0.033,
|
| 20155 |
+
"step": 142350
|
| 20156 |
+
},
|
| 20157 |
+
{
|
| 20158 |
+
"epoch": 18.016194331983804,
|
| 20159 |
+
"grad_norm": 5.930596828460693,
|
| 20160 |
+
"learning_rate": 4.902767368339017e-05,
|
| 20161 |
+
"loss": 0.0329,
|
| 20162 |
+
"step": 142400
|
| 20163 |
+
},
|
| 20164 |
+
{
|
| 20165 |
+
"epoch": 18.02252024291498,
|
| 20166 |
+
"grad_norm": 11.56811809539795,
|
| 20167 |
+
"learning_rate": 4.902614849345455e-05,
|
| 20168 |
+
"loss": 0.025,
|
| 20169 |
+
"step": 142450
|
| 20170 |
+
},
|
| 20171 |
+
{
|
| 20172 |
+
"epoch": 18.028846153846153,
|
| 20173 |
+
"grad_norm": 1.9195815324783325,
|
| 20174 |
+
"learning_rate": 4.902462213201155e-05,
|
| 20175 |
+
"loss": 0.028,
|
| 20176 |
+
"step": 142500
|
| 20177 |
+
},
|
| 20178 |
+
{
|
| 20179 |
+
"epoch": 18.035172064777328,
|
| 20180 |
+
"grad_norm": 5.02556848526001,
|
| 20181 |
+
"learning_rate": 4.90230945991356e-05,
|
| 20182 |
+
"loss": 0.0364,
|
| 20183 |
+
"step": 142550
|
| 20184 |
+
},
|
| 20185 |
+
{
|
| 20186 |
+
"epoch": 18.041497975708502,
|
| 20187 |
+
"grad_norm": 3.7673704624176025,
|
| 20188 |
+
"learning_rate": 4.902156589490118e-05,
|
| 20189 |
+
"loss": 0.0351,
|
| 20190 |
+
"step": 142600
|
| 20191 |
+
},
|
| 20192 |
+
{
|
| 20193 |
+
"epoch": 18.047823886639677,
|
| 20194 |
+
"grad_norm": 9.963543891906738,
|
| 20195 |
+
"learning_rate": 4.902003601938284e-05,
|
| 20196 |
+
"loss": 0.0395,
|
| 20197 |
+
"step": 142650
|
| 20198 |
+
},
|
| 20199 |
+
{
|
| 20200 |
+
"epoch": 18.05414979757085,
|
| 20201 |
+
"grad_norm": 5.36358118057251,
|
| 20202 |
+
"learning_rate": 4.901850497265515e-05,
|
| 20203 |
+
"loss": 0.0341,
|
| 20204 |
+
"step": 142700
|
| 20205 |
+
},
|
| 20206 |
+
{
|
| 20207 |
+
"epoch": 18.060475708502025,
|
| 20208 |
+
"grad_norm": 2.3602027893066406,
|
| 20209 |
+
"learning_rate": 4.901697275479279e-05,
|
| 20210 |
+
"loss": 0.0359,
|
| 20211 |
+
"step": 142750
|
| 20212 |
+
},
|
| 20213 |
+
{
|
| 20214 |
+
"epoch": 18.0668016194332,
|
| 20215 |
+
"grad_norm": 0.43489065766334534,
|
| 20216 |
+
"learning_rate": 4.9015439365870466e-05,
|
| 20217 |
+
"loss": 0.0338,
|
| 20218 |
+
"step": 142800
|
| 20219 |
+
},
|
| 20220 |
+
{
|
| 20221 |
+
"epoch": 18.073127530364374,
|
| 20222 |
+
"grad_norm": 7.398189067840576,
|
| 20223 |
+
"learning_rate": 4.9013904805962935e-05,
|
| 20224 |
+
"loss": 0.0307,
|
| 20225 |
+
"step": 142850
|
| 20226 |
+
},
|
| 20227 |
+
{
|
| 20228 |
+
"epoch": 18.079453441295545,
|
| 20229 |
+
"grad_norm": 4.547923564910889,
|
| 20230 |
+
"learning_rate": 4.901236907514502e-05,
|
| 20231 |
+
"loss": 0.0381,
|
| 20232 |
+
"step": 142900
|
| 20233 |
+
},
|
| 20234 |
+
{
|
| 20235 |
+
"epoch": 18.08577935222672,
|
| 20236 |
+
"grad_norm": 5.263890266418457,
|
| 20237 |
+
"learning_rate": 4.9010832173491615e-05,
|
| 20238 |
+
"loss": 0.035,
|
| 20239 |
+
"step": 142950
|
| 20240 |
+
},
|
| 20241 |
+
{
|
| 20242 |
+
"epoch": 18.092105263157894,
|
| 20243 |
+
"grad_norm": 1.632132887840271,
|
| 20244 |
+
"learning_rate": 4.900929410107766e-05,
|
| 20245 |
+
"loss": 0.0308,
|
| 20246 |
+
"step": 143000
|
| 20247 |
+
},
|
| 20248 |
+
{
|
| 20249 |
+
"epoch": 18.098431174089068,
|
| 20250 |
+
"grad_norm": 0.8041329979896545,
|
| 20251 |
+
"learning_rate": 4.900775485797814e-05,
|
| 20252 |
+
"loss": 0.0377,
|
| 20253 |
+
"step": 143050
|
| 20254 |
+
},
|
| 20255 |
+
{
|
| 20256 |
+
"epoch": 18.104757085020243,
|
| 20257 |
+
"grad_norm": 4.4583659172058105,
|
| 20258 |
+
"learning_rate": 4.900621444426811e-05,
|
| 20259 |
+
"loss": 0.0338,
|
| 20260 |
+
"step": 143100
|
| 20261 |
+
},
|
| 20262 |
+
{
|
| 20263 |
+
"epoch": 18.111082995951417,
|
| 20264 |
+
"grad_norm": 2.080909490585327,
|
| 20265 |
+
"learning_rate": 4.9004672860022684e-05,
|
| 20266 |
+
"loss": 0.0351,
|
| 20267 |
+
"step": 143150
|
| 20268 |
+
},
|
| 20269 |
+
{
|
| 20270 |
+
"epoch": 18.11740890688259,
|
| 20271 |
+
"grad_norm": 1.0346838235855103,
|
| 20272 |
+
"learning_rate": 4.900313010531702e-05,
|
| 20273 |
+
"loss": 0.0273,
|
| 20274 |
+
"step": 143200
|
| 20275 |
+
},
|
| 20276 |
+
{
|
| 20277 |
+
"epoch": 18.123734817813766,
|
| 20278 |
+
"grad_norm": 2.481536865234375,
|
| 20279 |
+
"learning_rate": 4.9001586180226355e-05,
|
| 20280 |
+
"loss": 0.0387,
|
| 20281 |
+
"step": 143250
|
| 20282 |
+
},
|
| 20283 |
+
{
|
| 20284 |
+
"epoch": 18.13006072874494,
|
| 20285 |
+
"grad_norm": 2.8943986892700195,
|
| 20286 |
+
"learning_rate": 4.900004108482597e-05,
|
| 20287 |
+
"loss": 0.0401,
|
| 20288 |
+
"step": 143300
|
| 20289 |
+
},
|
| 20290 |
+
{
|
| 20291 |
+
"epoch": 18.136386639676115,
|
| 20292 |
+
"grad_norm": 4.4804463386535645,
|
| 20293 |
+
"learning_rate": 4.899849481919119e-05,
|
| 20294 |
+
"loss": 0.0366,
|
| 20295 |
+
"step": 143350
|
| 20296 |
+
},
|
| 20297 |
+
{
|
| 20298 |
+
"epoch": 18.14271255060729,
|
| 20299 |
+
"grad_norm": 3.364633798599243,
|
| 20300 |
+
"learning_rate": 4.899694738339743e-05,
|
| 20301 |
+
"loss": 0.0275,
|
| 20302 |
+
"step": 143400
|
| 20303 |
+
},
|
| 20304 |
+
{
|
| 20305 |
+
"epoch": 18.14903846153846,
|
| 20306 |
+
"grad_norm": 5.939627647399902,
|
| 20307 |
+
"learning_rate": 4.899539877752012e-05,
|
| 20308 |
+
"loss": 0.0391,
|
| 20309 |
+
"step": 143450
|
| 20310 |
+
},
|
| 20311 |
+
{
|
| 20312 |
+
"epoch": 18.155364372469634,
|
| 20313 |
+
"grad_norm": 3.305469512939453,
|
| 20314 |
+
"learning_rate": 4.8993849001634786e-05,
|
| 20315 |
+
"loss": 0.035,
|
| 20316 |
+
"step": 143500
|
| 20317 |
+
},
|
| 20318 |
+
{
|
| 20319 |
+
"epoch": 18.16169028340081,
|
| 20320 |
+
"grad_norm": 5.215659141540527,
|
| 20321 |
+
"learning_rate": 4.899229805581699e-05,
|
| 20322 |
+
"loss": 0.0345,
|
| 20323 |
+
"step": 143550
|
| 20324 |
+
},
|
| 20325 |
+
{
|
| 20326 |
+
"epoch": 18.168016194331983,
|
| 20327 |
+
"grad_norm": 3.335085868835449,
|
| 20328 |
+
"learning_rate": 4.8990745940142354e-05,
|
| 20329 |
+
"loss": 0.0328,
|
| 20330 |
+
"step": 143600
|
| 20331 |
+
},
|
| 20332 |
+
{
|
| 20333 |
+
"epoch": 18.174342105263158,
|
| 20334 |
+
"grad_norm": 0.1246602013707161,
|
| 20335 |
+
"learning_rate": 4.8989192654686564e-05,
|
| 20336 |
+
"loss": 0.0318,
|
| 20337 |
+
"step": 143650
|
| 20338 |
+
},
|
| 20339 |
+
{
|
| 20340 |
+
"epoch": 18.180668016194332,
|
| 20341 |
+
"grad_norm": 7.838638782501221,
|
| 20342 |
+
"learning_rate": 4.898763819952534e-05,
|
| 20343 |
+
"loss": 0.0338,
|
| 20344 |
+
"step": 143700
|
| 20345 |
+
},
|
| 20346 |
+
{
|
| 20347 |
+
"epoch": 18.186993927125506,
|
| 20348 |
+
"grad_norm": 0.5300619602203369,
|
| 20349 |
+
"learning_rate": 4.898608257473451e-05,
|
| 20350 |
+
"loss": 0.0323,
|
| 20351 |
+
"step": 143750
|
| 20352 |
+
},
|
| 20353 |
+
{
|
| 20354 |
+
"epoch": 18.19331983805668,
|
| 20355 |
+
"grad_norm": 1.8294352293014526,
|
| 20356 |
+
"learning_rate": 4.898452578038989e-05,
|
| 20357 |
+
"loss": 0.0372,
|
| 20358 |
+
"step": 143800
|
| 20359 |
+
},
|
| 20360 |
+
{
|
| 20361 |
+
"epoch": 18.199645748987855,
|
| 20362 |
+
"grad_norm": 9.458368301391602,
|
| 20363 |
+
"learning_rate": 4.898296781656741e-05,
|
| 20364 |
+
"loss": 0.0354,
|
| 20365 |
+
"step": 143850
|
| 20366 |
+
},
|
| 20367 |
+
{
|
| 20368 |
+
"epoch": 18.20597165991903,
|
| 20369 |
+
"grad_norm": 1.249141812324524,
|
| 20370 |
+
"learning_rate": 4.898140868334303e-05,
|
| 20371 |
+
"loss": 0.0361,
|
| 20372 |
+
"step": 143900
|
| 20373 |
+
},
|
| 20374 |
+
{
|
| 20375 |
+
"epoch": 18.212297570850204,
|
| 20376 |
+
"grad_norm": 4.6996750831604,
|
| 20377 |
+
"learning_rate": 4.8979848380792775e-05,
|
| 20378 |
+
"loss": 0.0264,
|
| 20379 |
+
"step": 143950
|
| 20380 |
+
},
|
| 20381 |
+
{
|
| 20382 |
+
"epoch": 18.218623481781375,
|
| 20383 |
+
"grad_norm": 8.336326599121094,
|
| 20384 |
+
"learning_rate": 4.897828690899272e-05,
|
| 20385 |
+
"loss": 0.0393,
|
| 20386 |
+
"step": 144000
|
| 20387 |
+
},
|
| 20388 |
+
{
|
| 20389 |
+
"epoch": 18.22494939271255,
|
| 20390 |
+
"grad_norm": 7.068429470062256,
|
| 20391 |
+
"learning_rate": 4.897672426801901e-05,
|
| 20392 |
+
"loss": 0.035,
|
| 20393 |
+
"step": 144050
|
| 20394 |
+
},
|
| 20395 |
+
{
|
| 20396 |
+
"epoch": 18.231275303643724,
|
| 20397 |
+
"grad_norm": 7.055919170379639,
|
| 20398 |
+
"learning_rate": 4.897516045794783e-05,
|
| 20399 |
+
"loss": 0.0284,
|
| 20400 |
+
"step": 144100
|
| 20401 |
+
},
|
| 20402 |
+
{
|
| 20403 |
+
"epoch": 18.237601214574898,
|
| 20404 |
+
"grad_norm": 2.4833292961120605,
|
| 20405 |
+
"learning_rate": 4.897359547885544e-05,
|
| 20406 |
+
"loss": 0.0384,
|
| 20407 |
+
"step": 144150
|
| 20408 |
+
},
|
| 20409 |
+
{
|
| 20410 |
+
"epoch": 18.243927125506072,
|
| 20411 |
+
"grad_norm": 7.973937511444092,
|
| 20412 |
+
"learning_rate": 4.8972029330818134e-05,
|
| 20413 |
+
"loss": 0.0389,
|
| 20414 |
+
"step": 144200
|
| 20415 |
+
},
|
| 20416 |
+
{
|
| 20417 |
+
"epoch": 18.250253036437247,
|
| 20418 |
+
"grad_norm": 5.254895210266113,
|
| 20419 |
+
"learning_rate": 4.89704620139123e-05,
|
| 20420 |
+
"loss": 0.0348,
|
| 20421 |
+
"step": 144250
|
| 20422 |
+
},
|
| 20423 |
+
{
|
| 20424 |
+
"epoch": 18.25657894736842,
|
| 20425 |
+
"grad_norm": 7.2056169509887695,
|
| 20426 |
+
"learning_rate": 4.896889352821433e-05,
|
| 20427 |
+
"loss": 0.0323,
|
| 20428 |
+
"step": 144300
|
| 20429 |
+
},
|
| 20430 |
+
{
|
| 20431 |
+
"epoch": 18.262904858299596,
|
| 20432 |
+
"grad_norm": 2.8704984188079834,
|
| 20433 |
+
"learning_rate": 4.8967323873800724e-05,
|
| 20434 |
+
"loss": 0.0392,
|
| 20435 |
+
"step": 144350
|
| 20436 |
+
},
|
| 20437 |
+
{
|
| 20438 |
+
"epoch": 18.26923076923077,
|
| 20439 |
+
"grad_norm": 6.73195219039917,
|
| 20440 |
+
"learning_rate": 4.896575305074802e-05,
|
| 20441 |
+
"loss": 0.0344,
|
| 20442 |
+
"step": 144400
|
| 20443 |
+
},
|
| 20444 |
+
{
|
| 20445 |
+
"epoch": 18.275556680161944,
|
| 20446 |
+
"grad_norm": 4.400627613067627,
|
| 20447 |
+
"learning_rate": 4.896418105913279e-05,
|
| 20448 |
+
"loss": 0.0351,
|
| 20449 |
+
"step": 144450
|
| 20450 |
+
},
|
| 20451 |
+
{
|
| 20452 |
+
"epoch": 18.28188259109312,
|
| 20453 |
+
"grad_norm": 2.146008014678955,
|
| 20454 |
+
"learning_rate": 4.896260789903171e-05,
|
| 20455 |
+
"loss": 0.0354,
|
| 20456 |
+
"step": 144500
|
| 20457 |
+
},
|
| 20458 |
+
{
|
| 20459 |
+
"epoch": 18.28820850202429,
|
| 20460 |
+
"grad_norm": 4.487647533416748,
|
| 20461 |
+
"learning_rate": 4.8961033570521465e-05,
|
| 20462 |
+
"loss": 0.0425,
|
| 20463 |
+
"step": 144550
|
| 20464 |
+
},
|
| 20465 |
+
{
|
| 20466 |
+
"epoch": 18.294534412955464,
|
| 20467 |
+
"grad_norm": 1.7115343809127808,
|
| 20468 |
+
"learning_rate": 4.895945807367882e-05,
|
| 20469 |
+
"loss": 0.0312,
|
| 20470 |
+
"step": 144600
|
| 20471 |
+
},
|
| 20472 |
+
{
|
| 20473 |
+
"epoch": 18.30086032388664,
|
| 20474 |
+
"grad_norm": 7.001058578491211,
|
| 20475 |
+
"learning_rate": 4.895788140858062e-05,
|
| 20476 |
+
"loss": 0.0456,
|
| 20477 |
+
"step": 144650
|
| 20478 |
+
},
|
| 20479 |
+
{
|
| 20480 |
+
"epoch": 18.307186234817813,
|
| 20481 |
+
"grad_norm": 6.730059623718262,
|
| 20482 |
+
"learning_rate": 4.895630357530371e-05,
|
| 20483 |
+
"loss": 0.0341,
|
| 20484 |
+
"step": 144700
|
| 20485 |
+
},
|
| 20486 |
+
{
|
| 20487 |
+
"epoch": 18.313512145748987,
|
| 20488 |
+
"grad_norm": 6.6234588623046875,
|
| 20489 |
+
"learning_rate": 4.8954724573925056e-05,
|
| 20490 |
+
"loss": 0.0338,
|
| 20491 |
+
"step": 144750
|
| 20492 |
+
},
|
| 20493 |
+
{
|
| 20494 |
+
"epoch": 18.31983805668016,
|
| 20495 |
+
"grad_norm": 5.9280571937561035,
|
| 20496 |
+
"learning_rate": 4.895314440452162e-05,
|
| 20497 |
+
"loss": 0.0379,
|
| 20498 |
+
"step": 144800
|
| 20499 |
+
},
|
| 20500 |
+
{
|
| 20501 |
+
"epoch": 18.326163967611336,
|
| 20502 |
+
"grad_norm": 0.3688987195491791,
|
| 20503 |
+
"learning_rate": 4.895156306717048e-05,
|
| 20504 |
+
"loss": 0.0388,
|
| 20505 |
+
"step": 144850
|
| 20506 |
+
},
|
| 20507 |
+
{
|
| 20508 |
+
"epoch": 18.33248987854251,
|
| 20509 |
+
"grad_norm": 5.018438816070557,
|
| 20510 |
+
"learning_rate": 4.894998056194872e-05,
|
| 20511 |
+
"loss": 0.0384,
|
| 20512 |
+
"step": 144900
|
| 20513 |
+
},
|
| 20514 |
+
{
|
| 20515 |
+
"epoch": 18.338815789473685,
|
| 20516 |
+
"grad_norm": 4.336573123931885,
|
| 20517 |
+
"learning_rate": 4.89483968889335e-05,
|
| 20518 |
+
"loss": 0.04,
|
| 20519 |
+
"step": 144950
|
| 20520 |
+
},
|
| 20521 |
+
{
|
| 20522 |
+
"epoch": 18.34514170040486,
|
| 20523 |
+
"grad_norm": 4.0648512840271,
|
| 20524 |
+
"learning_rate": 4.894681204820205e-05,
|
| 20525 |
+
"loss": 0.0381,
|
| 20526 |
+
"step": 145000
|
| 20527 |
+
},
|
| 20528 |
+
{
|
| 20529 |
+
"epoch": 18.351467611336034,
|
| 20530 |
+
"grad_norm": 2.135298490524292,
|
| 20531 |
+
"learning_rate": 4.894522603983165e-05,
|
| 20532 |
+
"loss": 0.0384,
|
| 20533 |
+
"step": 145050
|
| 20534 |
+
},
|
| 20535 |
+
{
|
| 20536 |
+
"epoch": 18.357793522267208,
|
| 20537 |
+
"grad_norm": 1.1647357940673828,
|
| 20538 |
+
"learning_rate": 4.894363886389963e-05,
|
| 20539 |
+
"loss": 0.0344,
|
| 20540 |
+
"step": 145100
|
| 20541 |
+
},
|
| 20542 |
+
{
|
| 20543 |
+
"epoch": 18.36411943319838,
|
| 20544 |
+
"grad_norm": 14.28404712677002,
|
| 20545 |
+
"learning_rate": 4.894205052048337e-05,
|
| 20546 |
+
"loss": 0.0357,
|
| 20547 |
+
"step": 145150
|
| 20548 |
+
},
|
| 20549 |
+
{
|
| 20550 |
+
"epoch": 18.370445344129553,
|
| 20551 |
+
"grad_norm": 3.087641954421997,
|
| 20552 |
+
"learning_rate": 4.894046100966033e-05,
|
| 20553 |
+
"loss": 0.0336,
|
| 20554 |
+
"step": 145200
|
| 20555 |
+
},
|
| 20556 |
+
{
|
| 20557 |
+
"epoch": 18.376771255060728,
|
| 20558 |
+
"grad_norm": 4.437451362609863,
|
| 20559 |
+
"learning_rate": 4.8938870331508005e-05,
|
| 20560 |
+
"loss": 0.0447,
|
| 20561 |
+
"step": 145250
|
| 20562 |
+
},
|
| 20563 |
+
{
|
| 20564 |
+
"epoch": 18.383097165991902,
|
| 20565 |
+
"grad_norm": 4.422220706939697,
|
| 20566 |
+
"learning_rate": 4.8937278486103965e-05,
|
| 20567 |
+
"loss": 0.0343,
|
| 20568 |
+
"step": 145300
|
| 20569 |
+
},
|
| 20570 |
+
{
|
| 20571 |
+
"epoch": 18.389423076923077,
|
| 20572 |
+
"grad_norm": 2.800607919692993,
|
| 20573 |
+
"learning_rate": 4.893568547352582e-05,
|
| 20574 |
+
"loss": 0.0337,
|
| 20575 |
+
"step": 145350
|
| 20576 |
+
},
|
| 20577 |
+
{
|
| 20578 |
+
"epoch": 18.39574898785425,
|
| 20579 |
+
"grad_norm": 0.8396826386451721,
|
| 20580 |
+
"learning_rate": 4.8934091293851245e-05,
|
| 20581 |
+
"loss": 0.034,
|
| 20582 |
+
"step": 145400
|
| 20583 |
+
},
|
| 20584 |
+
{
|
| 20585 |
+
"epoch": 18.402074898785425,
|
| 20586 |
+
"grad_norm": 10.142098426818848,
|
| 20587 |
+
"learning_rate": 4.893249594715799e-05,
|
| 20588 |
+
"loss": 0.0347,
|
| 20589 |
+
"step": 145450
|
| 20590 |
+
},
|
| 20591 |
+
{
|
| 20592 |
+
"epoch": 18.4084008097166,
|
| 20593 |
+
"grad_norm": 7.561206817626953,
|
| 20594 |
+
"learning_rate": 4.8930899433523816e-05,
|
| 20595 |
+
"loss": 0.0438,
|
| 20596 |
+
"step": 145500
|
| 20597 |
+
},
|
| 20598 |
+
{
|
| 20599 |
+
"epoch": 18.414726720647774,
|
| 20600 |
+
"grad_norm": 3.3803863525390625,
|
| 20601 |
+
"learning_rate": 4.8929301753026577e-05,
|
| 20602 |
+
"loss": 0.0334,
|
| 20603 |
+
"step": 145550
|
| 20604 |
+
},
|
| 20605 |
+
{
|
| 20606 |
+
"epoch": 18.42105263157895,
|
| 20607 |
+
"grad_norm": 3.183805227279663,
|
| 20608 |
+
"learning_rate": 4.892770290574418e-05,
|
| 20609 |
+
"loss": 0.0463,
|
| 20610 |
+
"step": 145600
|
| 20611 |
+
},
|
| 20612 |
+
{
|
| 20613 |
+
"epoch": 18.427378542510123,
|
| 20614 |
+
"grad_norm": 5.520386219024658,
|
| 20615 |
+
"learning_rate": 4.8926102891754584e-05,
|
| 20616 |
+
"loss": 0.0386,
|
| 20617 |
+
"step": 145650
|
| 20618 |
+
},
|
| 20619 |
+
{
|
| 20620 |
+
"epoch": 18.433704453441294,
|
| 20621 |
+
"grad_norm": 5.411388874053955,
|
| 20622 |
+
"learning_rate": 4.8924501711135806e-05,
|
| 20623 |
+
"loss": 0.0459,
|
| 20624 |
+
"step": 145700
|
| 20625 |
+
},
|
| 20626 |
+
{
|
| 20627 |
+
"epoch": 18.44003036437247,
|
| 20628 |
+
"grad_norm": 7.040444374084473,
|
| 20629 |
+
"learning_rate": 4.892289936396591e-05,
|
| 20630 |
+
"loss": 0.0307,
|
| 20631 |
+
"step": 145750
|
| 20632 |
+
},
|
| 20633 |
+
{
|
| 20634 |
+
"epoch": 18.446356275303643,
|
| 20635 |
+
"grad_norm": 5.863219261169434,
|
| 20636 |
+
"learning_rate": 4.892129585032305e-05,
|
| 20637 |
+
"loss": 0.0429,
|
| 20638 |
+
"step": 145800
|
| 20639 |
+
},
|
| 20640 |
+
{
|
| 20641 |
+
"epoch": 18.452682186234817,
|
| 20642 |
+
"grad_norm": 0.40457883477211,
|
| 20643 |
+
"learning_rate": 4.8919691170285377e-05,
|
| 20644 |
+
"loss": 0.0331,
|
| 20645 |
+
"step": 145850
|
| 20646 |
+
},
|
| 20647 |
+
{
|
| 20648 |
+
"epoch": 18.45900809716599,
|
| 20649 |
+
"grad_norm": 4.594756603240967,
|
| 20650 |
+
"learning_rate": 4.891808532393116e-05,
|
| 20651 |
+
"loss": 0.0348,
|
| 20652 |
+
"step": 145900
|
| 20653 |
+
},
|
| 20654 |
+
{
|
| 20655 |
+
"epoch": 18.465334008097166,
|
| 20656 |
+
"grad_norm": 3.025521993637085,
|
| 20657 |
+
"learning_rate": 4.89164783113387e-05,
|
| 20658 |
+
"loss": 0.0397,
|
| 20659 |
+
"step": 145950
|
| 20660 |
+
},
|
| 20661 |
+
{
|
| 20662 |
+
"epoch": 18.47165991902834,
|
| 20663 |
+
"grad_norm": 4.226476192474365,
|
| 20664 |
+
"learning_rate": 4.8914870132586345e-05,
|
| 20665 |
+
"loss": 0.0403,
|
| 20666 |
+
"step": 146000
|
| 20667 |
+
},
|
| 20668 |
+
{
|
| 20669 |
+
"epoch": 18.477985829959515,
|
| 20670 |
+
"grad_norm": 0.9925578236579895,
|
| 20671 |
+
"learning_rate": 4.8913260787752515e-05,
|
| 20672 |
+
"loss": 0.037,
|
| 20673 |
+
"step": 146050
|
| 20674 |
+
},
|
| 20675 |
+
{
|
| 20676 |
+
"epoch": 18.48431174089069,
|
| 20677 |
+
"grad_norm": 4.826671600341797,
|
| 20678 |
+
"learning_rate": 4.891165027691567e-05,
|
| 20679 |
+
"loss": 0.032,
|
| 20680 |
+
"step": 146100
|
| 20681 |
+
},
|
| 20682 |
+
{
|
| 20683 |
+
"epoch": 18.490637651821864,
|
| 20684 |
+
"grad_norm": 1.542333722114563,
|
| 20685 |
+
"learning_rate": 4.891003860015435e-05,
|
| 20686 |
+
"loss": 0.0334,
|
| 20687 |
+
"step": 146150
|
| 20688 |
+
},
|
| 20689 |
+
{
|
| 20690 |
+
"epoch": 18.496963562753038,
|
| 20691 |
+
"grad_norm": 0.7635722160339355,
|
| 20692 |
+
"learning_rate": 4.890842575754713e-05,
|
| 20693 |
+
"loss": 0.0408,
|
| 20694 |
+
"step": 146200
|
| 20695 |
+
},
|
| 20696 |
+
{
|
| 20697 |
+
"epoch": 18.50328947368421,
|
| 20698 |
+
"grad_norm": 3.375404119491577,
|
| 20699 |
+
"learning_rate": 4.8906811749172663e-05,
|
| 20700 |
+
"loss": 0.0409,
|
| 20701 |
+
"step": 146250
|
| 20702 |
+
},
|
| 20703 |
+
{
|
| 20704 |
+
"epoch": 18.509615384615383,
|
| 20705 |
+
"grad_norm": 5.503757953643799,
|
| 20706 |
+
"learning_rate": 4.8905196575109645e-05,
|
| 20707 |
+
"loss": 0.032,
|
| 20708 |
+
"step": 146300
|
| 20709 |
+
},
|
| 20710 |
+
{
|
| 20711 |
+
"epoch": 18.515941295546558,
|
| 20712 |
+
"grad_norm": 6.0684428215026855,
|
| 20713 |
+
"learning_rate": 4.890358023543683e-05,
|
| 20714 |
+
"loss": 0.0391,
|
| 20715 |
+
"step": 146350
|
| 20716 |
+
},
|
| 20717 |
+
{
|
| 20718 |
+
"epoch": 18.522267206477732,
|
| 20719 |
+
"grad_norm": 8.43917465209961,
|
| 20720 |
+
"learning_rate": 4.890196273023302e-05,
|
| 20721 |
+
"loss": 0.0358,
|
| 20722 |
+
"step": 146400
|
| 20723 |
+
},
|
| 20724 |
+
{
|
| 20725 |
+
"epoch": 18.528593117408906,
|
| 20726 |
+
"grad_norm": 2.1651077270507812,
|
| 20727 |
+
"learning_rate": 4.89003440595771e-05,
|
| 20728 |
+
"loss": 0.0384,
|
| 20729 |
+
"step": 146450
|
| 20730 |
+
},
|
| 20731 |
+
{
|
| 20732 |
+
"epoch": 18.53491902834008,
|
| 20733 |
+
"grad_norm": 4.436135292053223,
|
| 20734 |
+
"learning_rate": 4.8898724223547986e-05,
|
| 20735 |
+
"loss": 0.037,
|
| 20736 |
+
"step": 146500
|
| 20737 |
+
},
|
| 20738 |
+
{
|
| 20739 |
+
"epoch": 18.541244939271255,
|
| 20740 |
+
"grad_norm": 5.470726013183594,
|
| 20741 |
+
"learning_rate": 4.889710322222467e-05,
|
| 20742 |
+
"loss": 0.0402,
|
| 20743 |
+
"step": 146550
|
| 20744 |
+
},
|
| 20745 |
+
{
|
| 20746 |
+
"epoch": 18.54757085020243,
|
| 20747 |
+
"grad_norm": 1.7798995971679688,
|
| 20748 |
+
"learning_rate": 4.8895481055686176e-05,
|
| 20749 |
+
"loss": 0.0329,
|
| 20750 |
+
"step": 146600
|
| 20751 |
+
},
|
| 20752 |
+
{
|
| 20753 |
+
"epoch": 18.553896761133604,
|
| 20754 |
+
"grad_norm": 8.59964370727539,
|
| 20755 |
+
"learning_rate": 4.8893857724011615e-05,
|
| 20756 |
+
"loss": 0.0425,
|
| 20757 |
+
"step": 146650
|
| 20758 |
+
},
|
| 20759 |
+
{
|
| 20760 |
+
"epoch": 18.56022267206478,
|
| 20761 |
+
"grad_norm": 8.19287109375,
|
| 20762 |
+
"learning_rate": 4.8892233227280125e-05,
|
| 20763 |
+
"loss": 0.0344,
|
| 20764 |
+
"step": 146700
|
| 20765 |
+
},
|
| 20766 |
+
{
|
| 20767 |
+
"epoch": 18.566548582995953,
|
| 20768 |
+
"grad_norm": 4.912006855010986,
|
| 20769 |
+
"learning_rate": 4.8890607565570934e-05,
|
| 20770 |
+
"loss": 0.0347,
|
| 20771 |
+
"step": 146750
|
| 20772 |
+
},
|
| 20773 |
+
{
|
| 20774 |
+
"epoch": 18.572874493927124,
|
| 20775 |
+
"grad_norm": 1.8978865146636963,
|
| 20776 |
+
"learning_rate": 4.88889807389633e-05,
|
| 20777 |
+
"loss": 0.0435,
|
| 20778 |
+
"step": 146800
|
| 20779 |
+
},
|
| 20780 |
+
{
|
| 20781 |
+
"epoch": 18.579200404858298,
|
| 20782 |
+
"grad_norm": 4.588613510131836,
|
| 20783 |
+
"learning_rate": 4.8887352747536545e-05,
|
| 20784 |
+
"loss": 0.0408,
|
| 20785 |
+
"step": 146850
|
| 20786 |
+
},
|
| 20787 |
+
{
|
| 20788 |
+
"epoch": 18.585526315789473,
|
| 20789 |
+
"grad_norm": 5.229101657867432,
|
| 20790 |
+
"learning_rate": 4.8885723591370047e-05,
|
| 20791 |
+
"loss": 0.0405,
|
| 20792 |
+
"step": 146900
|
| 20793 |
+
},
|
| 20794 |
+
{
|
| 20795 |
+
"epoch": 18.591852226720647,
|
| 20796 |
+
"grad_norm": 1.9847447872161865,
|
| 20797 |
+
"learning_rate": 4.8884093270543254e-05,
|
| 20798 |
+
"loss": 0.0351,
|
| 20799 |
+
"step": 146950
|
| 20800 |
+
},
|
| 20801 |
+
{
|
| 20802 |
+
"epoch": 18.59817813765182,
|
| 20803 |
+
"grad_norm": 0.37378185987472534,
|
| 20804 |
+
"learning_rate": 4.888246178513565e-05,
|
| 20805 |
+
"loss": 0.0338,
|
| 20806 |
+
"step": 147000
|
| 20807 |
+
},
|
| 20808 |
+
{
|
| 20809 |
+
"epoch": 18.604504048582996,
|
| 20810 |
+
"grad_norm": 5.98549222946167,
|
| 20811 |
+
"learning_rate": 4.888082913522679e-05,
|
| 20812 |
+
"loss": 0.037,
|
| 20813 |
+
"step": 147050
|
| 20814 |
+
},
|
| 20815 |
+
{
|
| 20816 |
+
"epoch": 18.61082995951417,
|
| 20817 |
+
"grad_norm": 1.4204696416854858,
|
| 20818 |
+
"learning_rate": 4.8879195320896275e-05,
|
| 20819 |
+
"loss": 0.0381,
|
| 20820 |
+
"step": 147100
|
| 20821 |
+
},
|
| 20822 |
+
{
|
| 20823 |
+
"epoch": 18.617155870445345,
|
| 20824 |
+
"grad_norm": 4.696562767028809,
|
| 20825 |
+
"learning_rate": 4.8877560342223786e-05,
|
| 20826 |
+
"loss": 0.0339,
|
| 20827 |
+
"step": 147150
|
| 20828 |
+
},
|
| 20829 |
+
{
|
| 20830 |
+
"epoch": 18.62348178137652,
|
| 20831 |
+
"grad_norm": 3.1213343143463135,
|
| 20832 |
+
"learning_rate": 4.887592419928903e-05,
|
| 20833 |
+
"loss": 0.0386,
|
| 20834 |
+
"step": 147200
|
| 20835 |
+
},
|
| 20836 |
+
{
|
| 20837 |
+
"epoch": 18.629807692307693,
|
| 20838 |
+
"grad_norm": 3.26423978805542,
|
| 20839 |
+
"learning_rate": 4.887428689217177e-05,
|
| 20840 |
+
"loss": 0.0379,
|
| 20841 |
+
"step": 147250
|
| 20842 |
+
},
|
| 20843 |
+
{
|
| 20844 |
+
"epoch": 18.636133603238868,
|
| 20845 |
+
"grad_norm": 7.11868143081665,
|
| 20846 |
+
"learning_rate": 4.887264842095187e-05,
|
| 20847 |
+
"loss": 0.0371,
|
| 20848 |
+
"step": 147300
|
| 20849 |
+
},
|
| 20850 |
+
{
|
| 20851 |
+
"epoch": 18.642459514170042,
|
| 20852 |
+
"grad_norm": 2.696596384048462,
|
| 20853 |
+
"learning_rate": 4.8871008785709214e-05,
|
| 20854 |
+
"loss": 0.0375,
|
| 20855 |
+
"step": 147350
|
| 20856 |
+
},
|
| 20857 |
+
{
|
| 20858 |
+
"epoch": 18.648785425101213,
|
| 20859 |
+
"grad_norm": 5.205997467041016,
|
| 20860 |
+
"learning_rate": 4.886936798652374e-05,
|
| 20861 |
+
"loss": 0.035,
|
| 20862 |
+
"step": 147400
|
| 20863 |
+
},
|
| 20864 |
+
{
|
| 20865 |
+
"epoch": 18.655111336032387,
|
| 20866 |
+
"grad_norm": 10.836588859558105,
|
| 20867 |
+
"learning_rate": 4.8867726023475455e-05,
|
| 20868 |
+
"loss": 0.0402,
|
| 20869 |
+
"step": 147450
|
| 20870 |
+
},
|
| 20871 |
+
{
|
| 20872 |
+
"epoch": 18.661437246963562,
|
| 20873 |
+
"grad_norm": 5.026825428009033,
|
| 20874 |
+
"learning_rate": 4.886608289664443e-05,
|
| 20875 |
+
"loss": 0.0423,
|
| 20876 |
+
"step": 147500
|
| 20877 |
+
},
|
| 20878 |
+
{
|
| 20879 |
+
"epoch": 18.667763157894736,
|
| 20880 |
+
"grad_norm": 4.280773639678955,
|
| 20881 |
+
"learning_rate": 4.886443860611078e-05,
|
| 20882 |
+
"loss": 0.0333,
|
| 20883 |
+
"step": 147550
|
| 20884 |
+
},
|
| 20885 |
+
{
|
| 20886 |
+
"epoch": 18.67408906882591,
|
| 20887 |
+
"grad_norm": 4.685303688049316,
|
| 20888 |
+
"learning_rate": 4.886279315195467e-05,
|
| 20889 |
+
"loss": 0.044,
|
| 20890 |
+
"step": 147600
|
| 20891 |
+
},
|
| 20892 |
+
{
|
| 20893 |
+
"epoch": 18.680414979757085,
|
| 20894 |
+
"grad_norm": 5.233664035797119,
|
| 20895 |
+
"learning_rate": 4.886114653425634e-05,
|
| 20896 |
+
"loss": 0.0361,
|
| 20897 |
+
"step": 147650
|
| 20898 |
+
},
|
| 20899 |
+
{
|
| 20900 |
+
"epoch": 18.68674089068826,
|
| 20901 |
+
"grad_norm": 0.5391689538955688,
|
| 20902 |
+
"learning_rate": 4.885949875309608e-05,
|
| 20903 |
+
"loss": 0.0376,
|
| 20904 |
+
"step": 147700
|
| 20905 |
+
},
|
| 20906 |
+
{
|
| 20907 |
+
"epoch": 18.693066801619434,
|
| 20908 |
+
"grad_norm": 5.6207594871521,
|
| 20909 |
+
"learning_rate": 4.885784980855423e-05,
|
| 20910 |
+
"loss": 0.0371,
|
| 20911 |
+
"step": 147750
|
| 20912 |
+
},
|
| 20913 |
+
{
|
| 20914 |
+
"epoch": 18.69939271255061,
|
| 20915 |
+
"grad_norm": 3.2478537559509277,
|
| 20916 |
+
"learning_rate": 4.88561997007112e-05,
|
| 20917 |
+
"loss": 0.0444,
|
| 20918 |
+
"step": 147800
|
| 20919 |
+
},
|
| 20920 |
+
{
|
| 20921 |
+
"epoch": 18.705718623481783,
|
| 20922 |
+
"grad_norm": 3.352736711502075,
|
| 20923 |
+
"learning_rate": 4.885454842964744e-05,
|
| 20924 |
+
"loss": 0.0428,
|
| 20925 |
+
"step": 147850
|
| 20926 |
+
},
|
| 20927 |
+
{
|
| 20928 |
+
"epoch": 18.712044534412957,
|
| 20929 |
+
"grad_norm": 2.903074026107788,
|
| 20930 |
+
"learning_rate": 4.885289599544347e-05,
|
| 20931 |
+
"loss": 0.0402,
|
| 20932 |
+
"step": 147900
|
| 20933 |
+
},
|
| 20934 |
+
{
|
| 20935 |
+
"epoch": 18.718370445344128,
|
| 20936 |
+
"grad_norm": 5.870631217956543,
|
| 20937 |
+
"learning_rate": 4.885124239817987e-05,
|
| 20938 |
+
"loss": 0.0341,
|
| 20939 |
+
"step": 147950
|
| 20940 |
+
},
|
| 20941 |
+
{
|
| 20942 |
+
"epoch": 18.724696356275302,
|
| 20943 |
+
"grad_norm": 0.5317630767822266,
|
| 20944 |
+
"learning_rate": 4.8849587637937245e-05,
|
| 20945 |
+
"loss": 0.0372,
|
| 20946 |
+
"step": 148000
|
| 20947 |
+
},
|
| 20948 |
+
{
|
| 20949 |
+
"epoch": 18.731022267206477,
|
| 20950 |
+
"grad_norm": 7.0173540115356445,
|
| 20951 |
+
"learning_rate": 4.88479317147963e-05,
|
| 20952 |
+
"loss": 0.0426,
|
| 20953 |
+
"step": 148050
|
| 20954 |
+
},
|
| 20955 |
+
{
|
| 20956 |
+
"epoch": 18.73734817813765,
|
| 20957 |
+
"grad_norm": 2.9341790676116943,
|
| 20958 |
+
"learning_rate": 4.884627462883778e-05,
|
| 20959 |
+
"loss": 0.0423,
|
| 20960 |
+
"step": 148100
|
| 20961 |
+
},
|
| 20962 |
+
{
|
| 20963 |
+
"epoch": 18.743674089068826,
|
| 20964 |
+
"grad_norm": 5.416380405426025,
|
| 20965 |
+
"learning_rate": 4.884461638014247e-05,
|
| 20966 |
+
"loss": 0.0382,
|
| 20967 |
+
"step": 148150
|
| 20968 |
+
},
|
| 20969 |
+
{
|
| 20970 |
+
"epoch": 18.75,
|
| 20971 |
+
"grad_norm": 1.5903581380844116,
|
| 20972 |
+
"learning_rate": 4.884295696879123e-05,
|
| 20973 |
+
"loss": 0.0396,
|
| 20974 |
+
"step": 148200
|
| 20975 |
+
},
|
| 20976 |
+
{
|
| 20977 |
+
"epoch": 18.756325910931174,
|
| 20978 |
+
"grad_norm": 0.8948712944984436,
|
| 20979 |
+
"learning_rate": 4.8841296394864986e-05,
|
| 20980 |
+
"loss": 0.0356,
|
| 20981 |
+
"step": 148250
|
| 20982 |
+
},
|
| 20983 |
+
{
|
| 20984 |
+
"epoch": 18.76265182186235,
|
| 20985 |
+
"grad_norm": 5.203305721282959,
|
| 20986 |
+
"learning_rate": 4.8839634658444686e-05,
|
| 20987 |
+
"loss": 0.033,
|
| 20988 |
+
"step": 148300
|
| 20989 |
+
},
|
| 20990 |
+
{
|
| 20991 |
+
"epoch": 18.768977732793523,
|
| 20992 |
+
"grad_norm": 8.04406452178955,
|
| 20993 |
+
"learning_rate": 4.883797175961137e-05,
|
| 20994 |
+
"loss": 0.0378,
|
| 20995 |
+
"step": 148350
|
| 20996 |
+
},
|
| 20997 |
+
{
|
| 20998 |
+
"epoch": 18.775303643724698,
|
| 20999 |
+
"grad_norm": 4.43959379196167,
|
| 21000 |
+
"learning_rate": 4.8836307698446114e-05,
|
| 21001 |
+
"loss": 0.0376,
|
| 21002 |
+
"step": 148400
|
| 21003 |
+
},
|
| 21004 |
+
{
|
| 21005 |
+
"epoch": 18.781629554655872,
|
| 21006 |
+
"grad_norm": 3.668619394302368,
|
| 21007 |
+
"learning_rate": 4.8834642475030066e-05,
|
| 21008 |
+
"loss": 0.0351,
|
| 21009 |
+
"step": 148450
|
| 21010 |
+
},
|
| 21011 |
+
{
|
| 21012 |
+
"epoch": 18.787955465587043,
|
| 21013 |
+
"grad_norm": 2.6937718391418457,
|
| 21014 |
+
"learning_rate": 4.883297608944441e-05,
|
| 21015 |
+
"loss": 0.0414,
|
| 21016 |
+
"step": 148500
|
| 21017 |
+
},
|
| 21018 |
+
{
|
| 21019 |
+
"epoch": 18.794281376518217,
|
| 21020 |
+
"grad_norm": 0.5433449745178223,
|
| 21021 |
+
"learning_rate": 4.88313085417704e-05,
|
| 21022 |
+
"loss": 0.0412,
|
| 21023 |
+
"step": 148550
|
| 21024 |
+
},
|
| 21025 |
+
{
|
| 21026 |
+
"epoch": 18.80060728744939,
|
| 21027 |
+
"grad_norm": 1.1174631118774414,
|
| 21028 |
+
"learning_rate": 4.882963983208936e-05,
|
| 21029 |
+
"loss": 0.0364,
|
| 21030 |
+
"step": 148600
|
| 21031 |
+
},
|
| 21032 |
+
{
|
| 21033 |
+
"epoch": 18.806933198380566,
|
| 21034 |
+
"grad_norm": 7.815613746643066,
|
| 21035 |
+
"learning_rate": 4.882796996048263e-05,
|
| 21036 |
+
"loss": 0.0353,
|
| 21037 |
+
"step": 148650
|
| 21038 |
+
},
|
| 21039 |
+
{
|
| 21040 |
+
"epoch": 18.81325910931174,
|
| 21041 |
+
"grad_norm": 8.072690963745117,
|
| 21042 |
+
"learning_rate": 4.882629892703166e-05,
|
| 21043 |
+
"loss": 0.0353,
|
| 21044 |
+
"step": 148700
|
| 21045 |
+
},
|
| 21046 |
+
{
|
| 21047 |
+
"epoch": 18.819585020242915,
|
| 21048 |
+
"grad_norm": 10.183627128601074,
|
| 21049 |
+
"learning_rate": 4.882462673181791e-05,
|
| 21050 |
+
"loss": 0.0342,
|
| 21051 |
+
"step": 148750
|
| 21052 |
+
},
|
| 21053 |
+
{
|
| 21054 |
+
"epoch": 18.82591093117409,
|
| 21055 |
+
"grad_norm": 10.193069458007812,
|
| 21056 |
+
"learning_rate": 4.882295337492292e-05,
|
| 21057 |
+
"loss": 0.0433,
|
| 21058 |
+
"step": 148800
|
| 21059 |
+
},
|
| 21060 |
+
{
|
| 21061 |
+
"epoch": 18.832236842105264,
|
| 21062 |
+
"grad_norm": 4.935394763946533,
|
| 21063 |
+
"learning_rate": 4.882127885642829e-05,
|
| 21064 |
+
"loss": 0.0374,
|
| 21065 |
+
"step": 148850
|
| 21066 |
+
},
|
| 21067 |
+
{
|
| 21068 |
+
"epoch": 18.838562753036438,
|
| 21069 |
+
"grad_norm": 6.387817859649658,
|
| 21070 |
+
"learning_rate": 4.8819603176415656e-05,
|
| 21071 |
+
"loss": 0.0422,
|
| 21072 |
+
"step": 148900
|
| 21073 |
+
},
|
| 21074 |
+
{
|
| 21075 |
+
"epoch": 18.844888663967613,
|
| 21076 |
+
"grad_norm": 4.677941799163818,
|
| 21077 |
+
"learning_rate": 4.881792633496674e-05,
|
| 21078 |
+
"loss": 0.0453,
|
| 21079 |
+
"step": 148950
|
| 21080 |
+
},
|
| 21081 |
+
{
|
| 21082 |
+
"epoch": 18.851214574898787,
|
| 21083 |
+
"grad_norm": 5.206737995147705,
|
| 21084 |
+
"learning_rate": 4.8816248332163286e-05,
|
| 21085 |
+
"loss": 0.0299,
|
| 21086 |
+
"step": 149000
|
| 21087 |
+
},
|
| 21088 |
+
{
|
| 21089 |
+
"epoch": 18.857540485829958,
|
| 21090 |
+
"grad_norm": 9.254892349243164,
|
| 21091 |
+
"learning_rate": 4.881456916808712e-05,
|
| 21092 |
+
"loss": 0.0416,
|
| 21093 |
+
"step": 149050
|
| 21094 |
+
},
|
| 21095 |
+
{
|
| 21096 |
+
"epoch": 18.863866396761132,
|
| 21097 |
+
"grad_norm": 8.23305892944336,
|
| 21098 |
+
"learning_rate": 4.8812888842820126e-05,
|
| 21099 |
+
"loss": 0.0357,
|
| 21100 |
+
"step": 149100
|
| 21101 |
+
},
|
| 21102 |
+
{
|
| 21103 |
+
"epoch": 18.870192307692307,
|
| 21104 |
+
"grad_norm": 6.135190486907959,
|
| 21105 |
+
"learning_rate": 4.881120735644422e-05,
|
| 21106 |
+
"loss": 0.0403,
|
| 21107 |
+
"step": 149150
|
| 21108 |
+
},
|
| 21109 |
+
{
|
| 21110 |
+
"epoch": 18.87651821862348,
|
| 21111 |
+
"grad_norm": 1.752958059310913,
|
| 21112 |
+
"learning_rate": 4.880952470904141e-05,
|
| 21113 |
+
"loss": 0.0456,
|
| 21114 |
+
"step": 149200
|
| 21115 |
+
},
|
| 21116 |
+
{
|
| 21117 |
+
"epoch": 18.882844129554655,
|
| 21118 |
+
"grad_norm": 5.834423542022705,
|
| 21119 |
+
"learning_rate": 4.8807840900693724e-05,
|
| 21120 |
+
"loss": 0.0402,
|
| 21121 |
+
"step": 149250
|
| 21122 |
+
},
|
| 21123 |
+
{
|
| 21124 |
+
"epoch": 18.88917004048583,
|
| 21125 |
+
"grad_norm": 4.26080322265625,
|
| 21126 |
+
"learning_rate": 4.8806155931483275e-05,
|
| 21127 |
+
"loss": 0.0422,
|
| 21128 |
+
"step": 149300
|
| 21129 |
+
},
|
| 21130 |
+
{
|
| 21131 |
+
"epoch": 18.895495951417004,
|
| 21132 |
+
"grad_norm": 3.6990087032318115,
|
| 21133 |
+
"learning_rate": 4.8804469801492226e-05,
|
| 21134 |
+
"loss": 0.0349,
|
| 21135 |
+
"step": 149350
|
| 21136 |
+
},
|
| 21137 |
+
{
|
| 21138 |
+
"epoch": 18.90182186234818,
|
| 21139 |
+
"grad_norm": 8.296131134033203,
|
| 21140 |
+
"learning_rate": 4.880278251080277e-05,
|
| 21141 |
+
"loss": 0.0432,
|
| 21142 |
+
"step": 149400
|
| 21143 |
+
},
|
| 21144 |
+
{
|
| 21145 |
+
"epoch": 18.908147773279353,
|
| 21146 |
+
"grad_norm": 2.964658737182617,
|
| 21147 |
+
"learning_rate": 4.88010940594972e-05,
|
| 21148 |
+
"loss": 0.0476,
|
| 21149 |
+
"step": 149450
|
| 21150 |
+
},
|
| 21151 |
+
{
|
| 21152 |
+
"epoch": 18.914473684210527,
|
| 21153 |
+
"grad_norm": 0.8520138263702393,
|
| 21154 |
+
"learning_rate": 4.879940444765784e-05,
|
| 21155 |
+
"loss": 0.0395,
|
| 21156 |
+
"step": 149500
|
| 21157 |
+
},
|
| 21158 |
+
{
|
| 21159 |
+
"epoch": 18.920799595141702,
|
| 21160 |
+
"grad_norm": 4.791383743286133,
|
| 21161 |
+
"learning_rate": 4.879771367536706e-05,
|
| 21162 |
+
"loss": 0.0485,
|
| 21163 |
+
"step": 149550
|
| 21164 |
+
},
|
| 21165 |
+
{
|
| 21166 |
+
"epoch": 18.927125506072876,
|
| 21167 |
+
"grad_norm": 0.970451295375824,
|
| 21168 |
+
"learning_rate": 4.879602174270733e-05,
|
| 21169 |
+
"loss": 0.0474,
|
| 21170 |
+
"step": 149600
|
| 21171 |
+
},
|
| 21172 |
+
{
|
| 21173 |
+
"epoch": 18.933451417004047,
|
| 21174 |
+
"grad_norm": 4.890208721160889,
|
| 21175 |
+
"learning_rate": 4.879432864976112e-05,
|
| 21176 |
+
"loss": 0.034,
|
| 21177 |
+
"step": 149650
|
| 21178 |
+
},
|
| 21179 |
+
{
|
| 21180 |
+
"epoch": 18.93977732793522,
|
| 21181 |
+
"grad_norm": 12.406875610351562,
|
| 21182 |
+
"learning_rate": 4.8792634396611005e-05,
|
| 21183 |
+
"loss": 0.0429,
|
| 21184 |
+
"step": 149700
|
| 21185 |
+
},
|
| 21186 |
+
{
|
| 21187 |
+
"epoch": 18.946103238866396,
|
| 21188 |
+
"grad_norm": 6.546516418457031,
|
| 21189 |
+
"learning_rate": 4.879093898333959e-05,
|
| 21190 |
+
"loss": 0.0392,
|
| 21191 |
+
"step": 149750
|
| 21192 |
+
},
|
| 21193 |
+
{
|
| 21194 |
+
"epoch": 18.95242914979757,
|
| 21195 |
+
"grad_norm": 3.4946086406707764,
|
| 21196 |
+
"learning_rate": 4.8789242410029526e-05,
|
| 21197 |
+
"loss": 0.046,
|
| 21198 |
+
"step": 149800
|
| 21199 |
+
},
|
| 21200 |
+
{
|
| 21201 |
+
"epoch": 18.958755060728745,
|
| 21202 |
+
"grad_norm": 5.585727691650391,
|
| 21203 |
+
"learning_rate": 4.878754467676356e-05,
|
| 21204 |
+
"loss": 0.0404,
|
| 21205 |
+
"step": 149850
|
| 21206 |
+
},
|
| 21207 |
+
{
|
| 21208 |
+
"epoch": 18.96508097165992,
|
| 21209 |
+
"grad_norm": 8.973703384399414,
|
| 21210 |
+
"learning_rate": 4.878584578362446e-05,
|
| 21211 |
+
"loss": 0.0364,
|
| 21212 |
+
"step": 149900
|
| 21213 |
+
},
|
| 21214 |
+
{
|
| 21215 |
+
"epoch": 18.971406882591094,
|
| 21216 |
+
"grad_norm": 9.472984313964844,
|
| 21217 |
+
"learning_rate": 4.8784145730695075e-05,
|
| 21218 |
+
"loss": 0.0417,
|
| 21219 |
+
"step": 149950
|
| 21220 |
+
},
|
| 21221 |
+
{
|
| 21222 |
+
"epoch": 18.977732793522268,
|
| 21223 |
+
"grad_norm": 8.045568466186523,
|
| 21224 |
+
"learning_rate": 4.878244451805829e-05,
|
| 21225 |
+
"loss": 0.037,
|
| 21226 |
+
"step": 150000
|
| 21227 |
+
},
|
| 21228 |
+
{
|
| 21229 |
+
"epoch": 18.984058704453442,
|
| 21230 |
+
"grad_norm": 6.592700004577637,
|
| 21231 |
+
"learning_rate": 4.878074214579706e-05,
|
| 21232 |
+
"loss": 0.0366,
|
| 21233 |
+
"step": 150050
|
| 21234 |
+
},
|
| 21235 |
+
{
|
| 21236 |
+
"epoch": 18.990384615384617,
|
| 21237 |
+
"grad_norm": 1.914005160331726,
|
| 21238 |
+
"learning_rate": 4.877903861399439e-05,
|
| 21239 |
+
"loss": 0.0355,
|
| 21240 |
+
"step": 150100
|
| 21241 |
+
},
|
| 21242 |
+
{
|
| 21243 |
+
"epoch": 18.99671052631579,
|
| 21244 |
+
"grad_norm": 9.871822357177734,
|
| 21245 |
+
"learning_rate": 4.8777333922733334e-05,
|
| 21246 |
+
"loss": 0.0415,
|
| 21247 |
+
"step": 150150
|
| 21248 |
+
},
|
| 21249 |
+
{
|
| 21250 |
+
"epoch": 19.0,
|
| 21251 |
+
"eval_accuracy": 0.9647017421768116,
|
| 21252 |
+
"eval_f1": 0.9650628878310451,
|
| 21253 |
+
"eval_loss": 0.14777891337871552,
|
| 21254 |
+
"eval_precision": 0.9651038822009902,
|
| 21255 |
+
"eval_recall": 0.9651774362311173,
|
| 21256 |
+
"eval_runtime": 395.6448,
|
| 21257 |
+
"eval_samples_per_second": 639.215,
|
| 21258 |
+
"eval_steps_per_second": 4.994,
|
| 21259 |
+
"step": 150176
|
| 21260 |
+
},
|
| 21261 |
+
{
|
| 21262 |
+
"epoch": 19.003036437246962,
|
| 21263 |
+
"grad_norm": 4.734246253967285,
|
| 21264 |
+
"learning_rate": 4.8775628072097036e-05,
|
| 21265 |
+
"loss": 0.0356,
|
| 21266 |
+
"step": 150200
|
| 21267 |
+
},
|
| 21268 |
+
{
|
| 21269 |
+
"epoch": 19.009362348178136,
|
| 21270 |
+
"grad_norm": 0.6306529641151428,
|
| 21271 |
+
"learning_rate": 4.8773921062168656e-05,
|
| 21272 |
+
"loss": 0.0285,
|
| 21273 |
+
"step": 150250
|
| 21274 |
+
},
|
| 21275 |
+
{
|
| 21276 |
+
"epoch": 19.01568825910931,
|
| 21277 |
+
"grad_norm": 1.1988229751586914,
|
| 21278 |
+
"learning_rate": 4.8772212893031424e-05,
|
| 21279 |
+
"loss": 0.0338,
|
| 21280 |
+
"step": 150300
|
| 21281 |
+
},
|
| 21282 |
+
{
|
| 21283 |
+
"epoch": 19.022014170040485,
|
| 21284 |
+
"grad_norm": 1.409118890762329,
|
| 21285 |
+
"learning_rate": 4.8770503564768635e-05,
|
| 21286 |
+
"loss": 0.0244,
|
| 21287 |
+
"step": 150350
|
| 21288 |
+
},
|
| 21289 |
+
{
|
| 21290 |
+
"epoch": 19.02834008097166,
|
| 21291 |
+
"grad_norm": 5.393570423126221,
|
| 21292 |
+
"learning_rate": 4.8768793077463644e-05,
|
| 21293 |
+
"loss": 0.0328,
|
| 21294 |
+
"step": 150400
|
| 21295 |
+
},
|
| 21296 |
+
{
|
| 21297 |
+
"epoch": 19.034665991902834,
|
| 21298 |
+
"grad_norm": 12.65257740020752,
|
| 21299 |
+
"learning_rate": 4.876708143119984e-05,
|
| 21300 |
+
"loss": 0.0304,
|
| 21301 |
+
"step": 150450
|
| 21302 |
+
},
|
| 21303 |
+
{
|
| 21304 |
+
"epoch": 19.04099190283401,
|
| 21305 |
+
"grad_norm": 5.8217973709106445,
|
| 21306 |
+
"learning_rate": 4.876536862606069e-05,
|
| 21307 |
+
"loss": 0.0333,
|
| 21308 |
+
"step": 150500
|
| 21309 |
+
},
|
| 21310 |
+
{
|
| 21311 |
+
"epoch": 19.047317813765183,
|
| 21312 |
+
"grad_norm": 0.42164111137390137,
|
| 21313 |
+
"learning_rate": 4.876365466212971e-05,
|
| 21314 |
+
"loss": 0.0315,
|
| 21315 |
+
"step": 150550
|
| 21316 |
+
},
|
| 21317 |
+
{
|
| 21318 |
+
"epoch": 19.053643724696357,
|
| 21319 |
+
"grad_norm": 6.7772111892700195,
|
| 21320 |
+
"learning_rate": 4.876193953949046e-05,
|
| 21321 |
+
"loss": 0.0371,
|
| 21322 |
+
"step": 150600
|
| 21323 |
+
},
|
| 21324 |
+
{
|
| 21325 |
+
"epoch": 19.05996963562753,
|
| 21326 |
+
"grad_norm": 0.8836453557014465,
|
| 21327 |
+
"learning_rate": 4.876022325822659e-05,
|
| 21328 |
+
"loss": 0.0323,
|
| 21329 |
+
"step": 150650
|
| 21330 |
+
},
|
| 21331 |
+
{
|
| 21332 |
+
"epoch": 19.066295546558706,
|
| 21333 |
+
"grad_norm": 11.357728958129883,
|
| 21334 |
+
"learning_rate": 4.875850581842177e-05,
|
| 21335 |
+
"loss": 0.0265,
|
| 21336 |
+
"step": 150700
|
| 21337 |
+
},
|
| 21338 |
+
{
|
| 21339 |
+
"epoch": 19.072621457489877,
|
| 21340 |
+
"grad_norm": 8.88844108581543,
|
| 21341 |
+
"learning_rate": 4.875678722015975e-05,
|
| 21342 |
+
"loss": 0.0361,
|
| 21343 |
+
"step": 150750
|
| 21344 |
+
},
|
| 21345 |
+
{
|
| 21346 |
+
"epoch": 19.07894736842105,
|
| 21347 |
+
"grad_norm": 2.658748149871826,
|
| 21348 |
+
"learning_rate": 4.8755067463524326e-05,
|
| 21349 |
+
"loss": 0.031,
|
| 21350 |
+
"step": 150800
|
| 21351 |
+
},
|
| 21352 |
+
{
|
| 21353 |
+
"epoch": 19.085273279352226,
|
| 21354 |
+
"grad_norm": 5.711607933044434,
|
| 21355 |
+
"learning_rate": 4.875334654859935e-05,
|
| 21356 |
+
"loss": 0.0318,
|
| 21357 |
+
"step": 150850
|
| 21358 |
+
},
|
| 21359 |
+
{
|
| 21360 |
+
"epoch": 19.0915991902834,
|
| 21361 |
+
"grad_norm": 2.49562406539917,
|
| 21362 |
+
"learning_rate": 4.875162447546873e-05,
|
| 21363 |
+
"loss": 0.0373,
|
| 21364 |
+
"step": 150900
|
| 21365 |
+
},
|
| 21366 |
+
{
|
| 21367 |
+
"epoch": 19.097925101214575,
|
| 21368 |
+
"grad_norm": 9.287304878234863,
|
| 21369 |
+
"learning_rate": 4.8749901244216446e-05,
|
| 21370 |
+
"loss": 0.0287,
|
| 21371 |
+
"step": 150950
|
| 21372 |
+
},
|
| 21373 |
+
{
|
| 21374 |
+
"epoch": 19.10425101214575,
|
| 21375 |
+
"grad_norm": 2.847449541091919,
|
| 21376 |
+
"learning_rate": 4.874817685492651e-05,
|
| 21377 |
+
"loss": 0.0278,
|
| 21378 |
+
"step": 151000
|
| 21379 |
+
},
|
| 21380 |
+
{
|
| 21381 |
+
"epoch": 19.110576923076923,
|
| 21382 |
+
"grad_norm": 5.4810309410095215,
|
| 21383 |
+
"learning_rate": 4.874645130768301e-05,
|
| 21384 |
+
"loss": 0.0417,
|
| 21385 |
+
"step": 151050
|
| 21386 |
+
},
|
| 21387 |
+
{
|
| 21388 |
+
"epoch": 19.116902834008098,
|
| 21389 |
+
"grad_norm": 2.562525510787964,
|
| 21390 |
+
"learning_rate": 4.874472460257007e-05,
|
| 21391 |
+
"loss": 0.0273,
|
| 21392 |
+
"step": 151100
|
| 21393 |
+
},
|
| 21394 |
+
{
|
| 21395 |
+
"epoch": 19.123228744939272,
|
| 21396 |
+
"grad_norm": 2.3100504875183105,
|
| 21397 |
+
"learning_rate": 4.87429967396719e-05,
|
| 21398 |
+
"loss": 0.0261,
|
| 21399 |
+
"step": 151150
|
| 21400 |
+
},
|
| 21401 |
+
{
|
| 21402 |
+
"epoch": 19.129554655870447,
|
| 21403 |
+
"grad_norm": 2.0324935913085938,
|
| 21404 |
+
"learning_rate": 4.874126771907275e-05,
|
| 21405 |
+
"loss": 0.0371,
|
| 21406 |
+
"step": 151200
|
| 21407 |
+
},
|
| 21408 |
+
{
|
| 21409 |
+
"epoch": 19.13588056680162,
|
| 21410 |
+
"grad_norm": 2.540350914001465,
|
| 21411 |
+
"learning_rate": 4.8739537540856914e-05,
|
| 21412 |
+
"loss": 0.0299,
|
| 21413 |
+
"step": 151250
|
| 21414 |
+
},
|
| 21415 |
+
{
|
| 21416 |
+
"epoch": 19.142206477732792,
|
| 21417 |
+
"grad_norm": 0.8543841242790222,
|
| 21418 |
+
"learning_rate": 4.873780620510877e-05,
|
| 21419 |
+
"loss": 0.0353,
|
| 21420 |
+
"step": 151300
|
| 21421 |
+
},
|
| 21422 |
+
{
|
| 21423 |
+
"epoch": 19.148532388663966,
|
| 21424 |
+
"grad_norm": 12.72982120513916,
|
| 21425 |
+
"learning_rate": 4.873607371191272e-05,
|
| 21426 |
+
"loss": 0.03,
|
| 21427 |
+
"step": 151350
|
| 21428 |
+
},
|
| 21429 |
+
{
|
| 21430 |
+
"epoch": 19.15485829959514,
|
| 21431 |
+
"grad_norm": 7.254883766174316,
|
| 21432 |
+
"learning_rate": 4.8734340061353254e-05,
|
| 21433 |
+
"loss": 0.0369,
|
| 21434 |
+
"step": 151400
|
| 21435 |
+
},
|
| 21436 |
+
{
|
| 21437 |
+
"epoch": 19.161184210526315,
|
| 21438 |
+
"grad_norm": 0.9049015045166016,
|
| 21439 |
+
"learning_rate": 4.8732605253514896e-05,
|
| 21440 |
+
"loss": 0.0294,
|
| 21441 |
+
"step": 151450
|
| 21442 |
+
},
|
| 21443 |
+
{
|
| 21444 |
+
"epoch": 19.16751012145749,
|
| 21445 |
+
"grad_norm": 6.480230331420898,
|
| 21446 |
+
"learning_rate": 4.873086928848224e-05,
|
| 21447 |
+
"loss": 0.0454,
|
| 21448 |
+
"step": 151500
|
| 21449 |
+
},
|
| 21450 |
+
{
|
| 21451 |
+
"epoch": 19.173836032388664,
|
| 21452 |
+
"grad_norm": 6.730071067810059,
|
| 21453 |
+
"learning_rate": 4.8729132166339925e-05,
|
| 21454 |
+
"loss": 0.0333,
|
| 21455 |
+
"step": 151550
|
| 21456 |
+
},
|
| 21457 |
+
{
|
| 21458 |
+
"epoch": 19.18016194331984,
|
| 21459 |
+
"grad_norm": 2.3233091831207275,
|
| 21460 |
+
"learning_rate": 4.872739388717266e-05,
|
| 21461 |
+
"loss": 0.0334,
|
| 21462 |
+
"step": 151600
|
| 21463 |
+
},
|
| 21464 |
+
{
|
| 21465 |
+
"epoch": 19.186487854251013,
|
| 21466 |
+
"grad_norm": 5.077264785766602,
|
| 21467 |
+
"learning_rate": 4.87256544510652e-05,
|
| 21468 |
+
"loss": 0.042,
|
| 21469 |
+
"step": 151650
|
| 21470 |
+
},
|
| 21471 |
+
{
|
| 21472 |
+
"epoch": 19.192813765182187,
|
| 21473 |
+
"grad_norm": 3.1759862899780273,
|
| 21474 |
+
"learning_rate": 4.872391385810235e-05,
|
| 21475 |
+
"loss": 0.0327,
|
| 21476 |
+
"step": 151700
|
| 21477 |
+
},
|
| 21478 |
+
{
|
| 21479 |
+
"epoch": 19.19913967611336,
|
| 21480 |
+
"grad_norm": 6.385429382324219,
|
| 21481 |
+
"learning_rate": 4.8722172108369005e-05,
|
| 21482 |
+
"loss": 0.0336,
|
| 21483 |
+
"step": 151750
|
| 21484 |
+
},
|
| 21485 |
+
{
|
| 21486 |
+
"epoch": 19.205465587044536,
|
| 21487 |
+
"grad_norm": 2.562042474746704,
|
| 21488 |
+
"learning_rate": 4.8720429201950064e-05,
|
| 21489 |
+
"loss": 0.0407,
|
| 21490 |
+
"step": 151800
|
| 21491 |
+
},
|
| 21492 |
+
{
|
| 21493 |
+
"epoch": 19.21179149797571,
|
| 21494 |
+
"grad_norm": 4.120340347290039,
|
| 21495 |
+
"learning_rate": 4.8718685138930524e-05,
|
| 21496 |
+
"loss": 0.0262,
|
| 21497 |
+
"step": 151850
|
| 21498 |
+
},
|
| 21499 |
+
{
|
| 21500 |
+
"epoch": 19.21811740890688,
|
| 21501 |
+
"grad_norm": 5.697352886199951,
|
| 21502 |
+
"learning_rate": 4.871693991939543e-05,
|
| 21503 |
+
"loss": 0.0316,
|
| 21504 |
+
"step": 151900
|
| 21505 |
+
},
|
| 21506 |
+
{
|
| 21507 |
+
"epoch": 19.224443319838056,
|
| 21508 |
+
"grad_norm": 1.1237770318984985,
|
| 21509 |
+
"learning_rate": 4.871519354342986e-05,
|
| 21510 |
+
"loss": 0.0363,
|
| 21511 |
+
"step": 151950
|
| 21512 |
+
},
|
| 21513 |
+
{
|
| 21514 |
+
"epoch": 19.23076923076923,
|
| 21515 |
+
"grad_norm": 7.250001907348633,
|
| 21516 |
+
"learning_rate": 4.8713446011118994e-05,
|
| 21517 |
+
"loss": 0.0418,
|
| 21518 |
+
"step": 152000
|
| 21519 |
+
},
|
| 21520 |
+
{
|
| 21521 |
+
"epoch": 19.237095141700404,
|
| 21522 |
+
"grad_norm": 2.805457830429077,
|
| 21523 |
+
"learning_rate": 4.871169732254802e-05,
|
| 21524 |
+
"loss": 0.0316,
|
| 21525 |
+
"step": 152050
|
| 21526 |
+
},
|
| 21527 |
+
{
|
| 21528 |
+
"epoch": 19.24342105263158,
|
| 21529 |
+
"grad_norm": 3.2430083751678467,
|
| 21530 |
+
"learning_rate": 4.8709947477802207e-05,
|
| 21531 |
+
"loss": 0.0356,
|
| 21532 |
+
"step": 152100
|
| 21533 |
+
},
|
| 21534 |
+
{
|
| 21535 |
+
"epoch": 19.249746963562753,
|
| 21536 |
+
"grad_norm": 4.794042587280273,
|
| 21537 |
+
"learning_rate": 4.8708196476966887e-05,
|
| 21538 |
+
"loss": 0.0323,
|
| 21539 |
+
"step": 152150
|
| 21540 |
+
},
|
| 21541 |
+
{
|
| 21542 |
+
"epoch": 19.256072874493928,
|
| 21543 |
+
"grad_norm": 8.48750114440918,
|
| 21544 |
+
"learning_rate": 4.8706444320127434e-05,
|
| 21545 |
+
"loss": 0.0372,
|
| 21546 |
+
"step": 152200
|
| 21547 |
+
},
|
| 21548 |
+
{
|
| 21549 |
+
"epoch": 19.262398785425102,
|
| 21550 |
+
"grad_norm": 4.308544635772705,
|
| 21551 |
+
"learning_rate": 4.8704691007369265e-05,
|
| 21552 |
+
"loss": 0.0325,
|
| 21553 |
+
"step": 152250
|
| 21554 |
+
},
|
| 21555 |
+
{
|
| 21556 |
+
"epoch": 19.268724696356276,
|
| 21557 |
+
"grad_norm": 13.768467903137207,
|
| 21558 |
+
"learning_rate": 4.8702936538777894e-05,
|
| 21559 |
+
"loss": 0.0358,
|
| 21560 |
+
"step": 152300
|
| 21561 |
+
},
|
| 21562 |
+
{
|
| 21563 |
+
"epoch": 19.27505060728745,
|
| 21564 |
+
"grad_norm": 1.695902705192566,
|
| 21565 |
+
"learning_rate": 4.870118091443886e-05,
|
| 21566 |
+
"loss": 0.0283,
|
| 21567 |
+
"step": 152350
|
| 21568 |
+
},
|
| 21569 |
+
{
|
| 21570 |
+
"epoch": 19.281376518218625,
|
| 21571 |
+
"grad_norm": 0.3675210177898407,
|
| 21572 |
+
"learning_rate": 4.8699424134437765e-05,
|
| 21573 |
+
"loss": 0.0296,
|
| 21574 |
+
"step": 152400
|
| 21575 |
+
},
|
| 21576 |
+
{
|
| 21577 |
+
"epoch": 19.287702429149796,
|
| 21578 |
+
"grad_norm": 4.012848377227783,
|
| 21579 |
+
"learning_rate": 4.869766619886027e-05,
|
| 21580 |
+
"loss": 0.0377,
|
| 21581 |
+
"step": 152450
|
| 21582 |
+
},
|
| 21583 |
+
{
|
| 21584 |
+
"epoch": 19.29402834008097,
|
| 21585 |
+
"grad_norm": 4.88248872756958,
|
| 21586 |
+
"learning_rate": 4.869590710779209e-05,
|
| 21587 |
+
"loss": 0.0372,
|
| 21588 |
+
"step": 152500
|
| 21589 |
+
},
|
| 21590 |
+
{
|
| 21591 |
+
"epoch": 19.300354251012145,
|
| 21592 |
+
"grad_norm": 2.6804938316345215,
|
| 21593 |
+
"learning_rate": 4.8694146861319e-05,
|
| 21594 |
+
"loss": 0.0308,
|
| 21595 |
+
"step": 152550
|
| 21596 |
+
},
|
| 21597 |
+
{
|
| 21598 |
+
"epoch": 19.30668016194332,
|
| 21599 |
+
"grad_norm": 5.441949367523193,
|
| 21600 |
+
"learning_rate": 4.869238545952683e-05,
|
| 21601 |
+
"loss": 0.0415,
|
| 21602 |
+
"step": 152600
|
| 21603 |
+
},
|
| 21604 |
+
{
|
| 21605 |
+
"epoch": 19.313006072874494,
|
| 21606 |
+
"grad_norm": 4.326409339904785,
|
| 21607 |
+
"learning_rate": 4.8690622902501474e-05,
|
| 21608 |
+
"loss": 0.03,
|
| 21609 |
+
"step": 152650
|
| 21610 |
+
},
|
| 21611 |
+
{
|
| 21612 |
+
"epoch": 19.319331983805668,
|
| 21613 |
+
"grad_norm": 6.0060248374938965,
|
| 21614 |
+
"learning_rate": 4.868885919032885e-05,
|
| 21615 |
+
"loss": 0.0337,
|
| 21616 |
+
"step": 152700
|
| 21617 |
+
},
|
| 21618 |
+
{
|
| 21619 |
+
"epoch": 19.325657894736842,
|
| 21620 |
+
"grad_norm": 5.025100231170654,
|
| 21621 |
+
"learning_rate": 4.868709432309497e-05,
|
| 21622 |
+
"loss": 0.0342,
|
| 21623 |
+
"step": 152750
|
| 21624 |
+
},
|
| 21625 |
+
{
|
| 21626 |
+
"epoch": 19.331983805668017,
|
| 21627 |
+
"grad_norm": 11.006143569946289,
|
| 21628 |
+
"learning_rate": 4.86853283008859e-05,
|
| 21629 |
+
"loss": 0.0295,
|
| 21630 |
+
"step": 152800
|
| 21631 |
+
},
|
| 21632 |
+
{
|
| 21633 |
+
"epoch": 19.33830971659919,
|
| 21634 |
+
"grad_norm": 1.3689677715301514,
|
| 21635 |
+
"learning_rate": 4.868356112378772e-05,
|
| 21636 |
+
"loss": 0.0401,
|
| 21637 |
+
"step": 152850
|
| 21638 |
+
},
|
| 21639 |
+
{
|
| 21640 |
+
"epoch": 19.344635627530366,
|
| 21641 |
+
"grad_norm": 5.3922319412231445,
|
| 21642 |
+
"learning_rate": 4.868179279188664e-05,
|
| 21643 |
+
"loss": 0.0336,
|
| 21644 |
+
"step": 152900
|
| 21645 |
+
},
|
| 21646 |
+
{
|
| 21647 |
+
"epoch": 19.35096153846154,
|
| 21648 |
+
"grad_norm": 8.157405853271484,
|
| 21649 |
+
"learning_rate": 4.868002330526884e-05,
|
| 21650 |
+
"loss": 0.0304,
|
| 21651 |
+
"step": 152950
|
| 21652 |
+
},
|
| 21653 |
+
{
|
| 21654 |
+
"epoch": 19.35728744939271,
|
| 21655 |
+
"grad_norm": 6.762574195861816,
|
| 21656 |
+
"learning_rate": 4.867825266402063e-05,
|
| 21657 |
+
"loss": 0.0378,
|
| 21658 |
+
"step": 153000
|
| 21659 |
+
},
|
| 21660 |
+
{
|
| 21661 |
+
"epoch": 19.363613360323885,
|
| 21662 |
+
"grad_norm": 4.323747158050537,
|
| 21663 |
+
"learning_rate": 4.8676480868228326e-05,
|
| 21664 |
+
"loss": 0.0379,
|
| 21665 |
+
"step": 153050
|
| 21666 |
+
},
|
| 21667 |
+
{
|
| 21668 |
+
"epoch": 19.36993927125506,
|
| 21669 |
+
"grad_norm": 4.651092052459717,
|
| 21670 |
+
"learning_rate": 4.8674707917978336e-05,
|
| 21671 |
+
"loss": 0.0361,
|
| 21672 |
+
"step": 153100
|
| 21673 |
+
},
|
| 21674 |
+
{
|
| 21675 |
+
"epoch": 19.376265182186234,
|
| 21676 |
+
"grad_norm": 0.875914990901947,
|
| 21677 |
+
"learning_rate": 4.8672933813357096e-05,
|
| 21678 |
+
"loss": 0.0364,
|
| 21679 |
+
"step": 153150
|
| 21680 |
+
},
|
| 21681 |
+
{
|
| 21682 |
+
"epoch": 19.38259109311741,
|
| 21683 |
+
"grad_norm": 6.586575031280518,
|
| 21684 |
+
"learning_rate": 4.867115855445112e-05,
|
| 21685 |
+
"loss": 0.0348,
|
| 21686 |
+
"step": 153200
|
| 21687 |
+
},
|
| 21688 |
+
{
|
| 21689 |
+
"epoch": 19.388917004048583,
|
| 21690 |
+
"grad_norm": 5.7100443840026855,
|
| 21691 |
+
"learning_rate": 4.8669382141346966e-05,
|
| 21692 |
+
"loss": 0.0365,
|
| 21693 |
+
"step": 153250
|
| 21694 |
+
},
|
| 21695 |
+
{
|
| 21696 |
+
"epoch": 19.395242914979757,
|
| 21697 |
+
"grad_norm": 2.900954246520996,
|
| 21698 |
+
"learning_rate": 4.866760457413124e-05,
|
| 21699 |
+
"loss": 0.0438,
|
| 21700 |
+
"step": 153300
|
| 21701 |
+
},
|
| 21702 |
+
{
|
| 21703 |
+
"epoch": 19.401568825910932,
|
| 21704 |
+
"grad_norm": 5.507075309753418,
|
| 21705 |
+
"learning_rate": 4.866582585289064e-05,
|
| 21706 |
+
"loss": 0.0351,
|
| 21707 |
+
"step": 153350
|
| 21708 |
+
},
|
| 21709 |
+
{
|
| 21710 |
+
"epoch": 19.407894736842106,
|
| 21711 |
+
"grad_norm": 9.766288757324219,
|
| 21712 |
+
"learning_rate": 4.866404597771187e-05,
|
| 21713 |
+
"loss": 0.0378,
|
| 21714 |
+
"step": 153400
|
| 21715 |
+
},
|
| 21716 |
+
{
|
| 21717 |
+
"epoch": 19.41422064777328,
|
| 21718 |
+
"grad_norm": 4.69030237197876,
|
| 21719 |
+
"learning_rate": 4.866226494868174e-05,
|
| 21720 |
+
"loss": 0.0289,
|
| 21721 |
+
"step": 153450
|
| 21722 |
+
},
|
| 21723 |
+
{
|
| 21724 |
+
"epoch": 19.420546558704455,
|
| 21725 |
+
"grad_norm": 6.837058067321777,
|
| 21726 |
+
"learning_rate": 4.866048276588707e-05,
|
| 21727 |
+
"loss": 0.0358,
|
| 21728 |
+
"step": 153500
|
| 21729 |
+
},
|
| 21730 |
+
{
|
| 21731 |
+
"epoch": 19.426872469635626,
|
| 21732 |
+
"grad_norm": 0.6201561689376831,
|
| 21733 |
+
"learning_rate": 4.865869942941478e-05,
|
| 21734 |
+
"loss": 0.0442,
|
| 21735 |
+
"step": 153550
|
| 21736 |
+
},
|
| 21737 |
+
{
|
| 21738 |
+
"epoch": 19.4331983805668,
|
| 21739 |
+
"grad_norm": 1.7400457859039307,
|
| 21740 |
+
"learning_rate": 4.86569149393518e-05,
|
| 21741 |
+
"loss": 0.0267,
|
| 21742 |
+
"step": 153600
|
| 21743 |
+
},
|
| 21744 |
+
{
|
| 21745 |
+
"epoch": 19.439524291497975,
|
| 21746 |
+
"grad_norm": 10.382699966430664,
|
| 21747 |
+
"learning_rate": 4.865512929578517e-05,
|
| 21748 |
+
"loss": 0.0391,
|
| 21749 |
+
"step": 153650
|
| 21750 |
+
},
|
| 21751 |
+
{
|
| 21752 |
+
"epoch": 19.44585020242915,
|
| 21753 |
+
"grad_norm": 1.5463078022003174,
|
| 21754 |
+
"learning_rate": 4.865334249880194e-05,
|
| 21755 |
+
"loss": 0.036,
|
| 21756 |
+
"step": 153700
|
| 21757 |
+
},
|
| 21758 |
+
{
|
| 21759 |
+
"epoch": 19.452176113360323,
|
| 21760 |
+
"grad_norm": 4.177750587463379,
|
| 21761 |
+
"learning_rate": 4.8651554548489234e-05,
|
| 21762 |
+
"loss": 0.0401,
|
| 21763 |
+
"step": 153750
|
| 21764 |
+
},
|
| 21765 |
+
{
|
| 21766 |
+
"epoch": 19.458502024291498,
|
| 21767 |
+
"grad_norm": 5.706336498260498,
|
| 21768 |
+
"learning_rate": 4.864976544493424e-05,
|
| 21769 |
+
"loss": 0.04,
|
| 21770 |
+
"step": 153800
|
| 21771 |
+
},
|
| 21772 |
+
{
|
| 21773 |
+
"epoch": 19.464827935222672,
|
| 21774 |
+
"grad_norm": 5.996949195861816,
|
| 21775 |
+
"learning_rate": 4.8647975188224176e-05,
|
| 21776 |
+
"loss": 0.0359,
|
| 21777 |
+
"step": 153850
|
| 21778 |
+
},
|
| 21779 |
+
{
|
| 21780 |
+
"epoch": 19.471153846153847,
|
| 21781 |
+
"grad_norm": 9.045064926147461,
|
| 21782 |
+
"learning_rate": 4.8646183778446354e-05,
|
| 21783 |
+
"loss": 0.033,
|
| 21784 |
+
"step": 153900
|
| 21785 |
+
},
|
| 21786 |
+
{
|
| 21787 |
+
"epoch": 19.47747975708502,
|
| 21788 |
+
"grad_norm": 9.032794952392578,
|
| 21789 |
+
"learning_rate": 4.864439121568811e-05,
|
| 21790 |
+
"loss": 0.0286,
|
| 21791 |
+
"step": 153950
|
| 21792 |
+
},
|
| 21793 |
+
{
|
| 21794 |
+
"epoch": 19.483805668016196,
|
| 21795 |
+
"grad_norm": 3.16361403465271,
|
| 21796 |
+
"learning_rate": 4.864259750003686e-05,
|
| 21797 |
+
"loss": 0.0372,
|
| 21798 |
+
"step": 154000
|
| 21799 |
+
},
|
| 21800 |
+
{
|
| 21801 |
+
"epoch": 19.49013157894737,
|
| 21802 |
+
"grad_norm": 4.25455379486084,
|
| 21803 |
+
"learning_rate": 4.8640802631580064e-05,
|
| 21804 |
+
"loss": 0.0375,
|
| 21805 |
+
"step": 154050
|
| 21806 |
+
},
|
| 21807 |
+
{
|
| 21808 |
+
"epoch": 19.49645748987854,
|
| 21809 |
+
"grad_norm": 4.855438709259033,
|
| 21810 |
+
"learning_rate": 4.863900661040522e-05,
|
| 21811 |
+
"loss": 0.0338,
|
| 21812 |
+
"step": 154100
|
| 21813 |
+
},
|
| 21814 |
+
{
|
| 21815 |
+
"epoch": 19.502783400809715,
|
| 21816 |
+
"grad_norm": 3.8494043350219727,
|
| 21817 |
+
"learning_rate": 4.863720943659993e-05,
|
| 21818 |
+
"loss": 0.0422,
|
| 21819 |
+
"step": 154150
|
| 21820 |
+
},
|
| 21821 |
+
{
|
| 21822 |
+
"epoch": 19.50910931174089,
|
| 21823 |
+
"grad_norm": 3.5886521339416504,
|
| 21824 |
+
"learning_rate": 4.86354111102518e-05,
|
| 21825 |
+
"loss": 0.0342,
|
| 21826 |
+
"step": 154200
|
| 21827 |
+
},
|
| 21828 |
+
{
|
| 21829 |
+
"epoch": 19.515435222672064,
|
| 21830 |
+
"grad_norm": 0.49025213718414307,
|
| 21831 |
+
"learning_rate": 4.8633611631448525e-05,
|
| 21832 |
+
"loss": 0.0297,
|
| 21833 |
+
"step": 154250
|
| 21834 |
+
},
|
| 21835 |
+
{
|
| 21836 |
+
"epoch": 19.52176113360324,
|
| 21837 |
+
"grad_norm": 3.8419318199157715,
|
| 21838 |
+
"learning_rate": 4.863181100027786e-05,
|
| 21839 |
+
"loss": 0.0354,
|
| 21840 |
+
"step": 154300
|
| 21841 |
+
},
|
| 21842 |
+
{
|
| 21843 |
+
"epoch": 19.528087044534413,
|
| 21844 |
+
"grad_norm": 5.799772262573242,
|
| 21845 |
+
"learning_rate": 4.863000921682758e-05,
|
| 21846 |
+
"loss": 0.0304,
|
| 21847 |
+
"step": 154350
|
| 21848 |
+
},
|
| 21849 |
+
{
|
| 21850 |
+
"epoch": 19.534412955465587,
|
| 21851 |
+
"grad_norm": 2.4468019008636475,
|
| 21852 |
+
"learning_rate": 4.862820628118555e-05,
|
| 21853 |
+
"loss": 0.0379,
|
| 21854 |
+
"step": 154400
|
| 21855 |
+
},
|
| 21856 |
+
{
|
| 21857 |
+
"epoch": 19.54073886639676,
|
| 21858 |
+
"grad_norm": 6.221810817718506,
|
| 21859 |
+
"learning_rate": 4.862640219343968e-05,
|
| 21860 |
+
"loss": 0.0371,
|
| 21861 |
+
"step": 154450
|
| 21862 |
+
},
|
| 21863 |
+
{
|
| 21864 |
+
"epoch": 19.547064777327936,
|
| 21865 |
+
"grad_norm": 2.6476356983184814,
|
| 21866 |
+
"learning_rate": 4.862459695367794e-05,
|
| 21867 |
+
"loss": 0.034,
|
| 21868 |
+
"step": 154500
|
| 21869 |
+
},
|
| 21870 |
+
{
|
| 21871 |
+
"epoch": 19.55339068825911,
|
| 21872 |
+
"grad_norm": 11.311680793762207,
|
| 21873 |
+
"learning_rate": 4.862279056198835e-05,
|
| 21874 |
+
"loss": 0.0366,
|
| 21875 |
+
"step": 154550
|
| 21876 |
+
},
|
| 21877 |
+
{
|
| 21878 |
+
"epoch": 19.559716599190285,
|
| 21879 |
+
"grad_norm": 4.133055210113525,
|
| 21880 |
+
"learning_rate": 4.862098301845899e-05,
|
| 21881 |
+
"loss": 0.0369,
|
| 21882 |
+
"step": 154600
|
| 21883 |
+
},
|
| 21884 |
+
{
|
| 21885 |
+
"epoch": 19.566042510121456,
|
| 21886 |
+
"grad_norm": 2.6999950408935547,
|
| 21887 |
+
"learning_rate": 4.8619174323177993e-05,
|
| 21888 |
+
"loss": 0.033,
|
| 21889 |
+
"step": 154650
|
| 21890 |
+
},
|
| 21891 |
+
{
|
| 21892 |
+
"epoch": 19.57236842105263,
|
| 21893 |
+
"grad_norm": 6.239814758300781,
|
| 21894 |
+
"learning_rate": 4.861736447623355e-05,
|
| 21895 |
+
"loss": 0.0318,
|
| 21896 |
+
"step": 154700
|
| 21897 |
+
},
|
| 21898 |
+
{
|
| 21899 |
+
"epoch": 19.578694331983804,
|
| 21900 |
+
"grad_norm": 0.6613300442695618,
|
| 21901 |
+
"learning_rate": 4.861555347771391e-05,
|
| 21902 |
+
"loss": 0.0439,
|
| 21903 |
+
"step": 154750
|
| 21904 |
+
},
|
| 21905 |
+
{
|
| 21906 |
+
"epoch": 19.58502024291498,
|
| 21907 |
+
"grad_norm": 3.582075357437134,
|
| 21908 |
+
"learning_rate": 4.861374132770738e-05,
|
| 21909 |
+
"loss": 0.0329,
|
| 21910 |
+
"step": 154800
|
| 21911 |
+
},
|
| 21912 |
+
{
|
| 21913 |
+
"epoch": 19.591346153846153,
|
| 21914 |
+
"grad_norm": 2.8332011699676514,
|
| 21915 |
+
"learning_rate": 4.8611928026302316e-05,
|
| 21916 |
+
"loss": 0.0362,
|
| 21917 |
+
"step": 154850
|
| 21918 |
+
},
|
| 21919 |
+
{
|
| 21920 |
+
"epoch": 19.597672064777328,
|
| 21921 |
+
"grad_norm": 2.0869016647338867,
|
| 21922 |
+
"learning_rate": 4.861011357358714e-05,
|
| 21923 |
+
"loss": 0.0401,
|
| 21924 |
+
"step": 154900
|
| 21925 |
+
},
|
| 21926 |
+
{
|
| 21927 |
+
"epoch": 19.603997975708502,
|
| 21928 |
+
"grad_norm": 0.05464273691177368,
|
| 21929 |
+
"learning_rate": 4.860829796965031e-05,
|
| 21930 |
+
"loss": 0.0285,
|
| 21931 |
+
"step": 154950
|
| 21932 |
+
},
|
| 21933 |
+
{
|
| 21934 |
+
"epoch": 19.610323886639677,
|
| 21935 |
+
"grad_norm": 7.693333148956299,
|
| 21936 |
+
"learning_rate": 4.8606481214580365e-05,
|
| 21937 |
+
"loss": 0.0366,
|
| 21938 |
+
"step": 155000
|
| 21939 |
+
},
|
| 21940 |
+
{
|
| 21941 |
+
"epoch": 19.61664979757085,
|
| 21942 |
+
"grad_norm": 0.594658613204956,
|
| 21943 |
+
"learning_rate": 4.860466330846589e-05,
|
| 21944 |
+
"loss": 0.0319,
|
| 21945 |
+
"step": 155050
|
| 21946 |
+
},
|
| 21947 |
+
{
|
| 21948 |
+
"epoch": 19.622975708502025,
|
| 21949 |
+
"grad_norm": 0.3907243609428406,
|
| 21950 |
+
"learning_rate": 4.860284425139553e-05,
|
| 21951 |
+
"loss": 0.0469,
|
| 21952 |
+
"step": 155100
|
| 21953 |
+
},
|
| 21954 |
+
{
|
| 21955 |
+
"epoch": 19.6293016194332,
|
| 21956 |
+
"grad_norm": 4.652459144592285,
|
| 21957 |
+
"learning_rate": 4.8601024043457964e-05,
|
| 21958 |
+
"loss": 0.0359,
|
| 21959 |
+
"step": 155150
|
| 21960 |
+
},
|
| 21961 |
+
{
|
| 21962 |
+
"epoch": 19.635627530364374,
|
| 21963 |
+
"grad_norm": 8.371901512145996,
|
| 21964 |
+
"learning_rate": 4.859920268474196e-05,
|
| 21965 |
+
"loss": 0.031,
|
| 21966 |
+
"step": 155200
|
| 21967 |
+
},
|
| 21968 |
+
{
|
| 21969 |
+
"epoch": 19.641953441295545,
|
| 21970 |
+
"grad_norm": 5.111876964569092,
|
| 21971 |
+
"learning_rate": 4.8597380175336323e-05,
|
| 21972 |
+
"loss": 0.0334,
|
| 21973 |
+
"step": 155250
|
| 21974 |
+
},
|
| 21975 |
+
{
|
| 21976 |
+
"epoch": 19.64827935222672,
|
| 21977 |
+
"grad_norm": 3.333644390106201,
|
| 21978 |
+
"learning_rate": 4.859555651532992e-05,
|
| 21979 |
+
"loss": 0.0374,
|
| 21980 |
+
"step": 155300
|
| 21981 |
+
},
|
| 21982 |
+
{
|
| 21983 |
+
"epoch": 19.654605263157894,
|
| 21984 |
+
"grad_norm": 2.7073512077331543,
|
| 21985 |
+
"learning_rate": 4.8593731704811663e-05,
|
| 21986 |
+
"loss": 0.0376,
|
| 21987 |
+
"step": 155350
|
| 21988 |
+
},
|
| 21989 |
+
{
|
| 21990 |
+
"epoch": 19.660931174089068,
|
| 21991 |
+
"grad_norm": 1.0986031293869019,
|
| 21992 |
+
"learning_rate": 4.8591905743870547e-05,
|
| 21993 |
+
"loss": 0.0424,
|
| 21994 |
+
"step": 155400
|
| 21995 |
+
},
|
| 21996 |
+
{
|
| 21997 |
+
"epoch": 19.667257085020243,
|
| 21998 |
+
"grad_norm": 2.395869493484497,
|
| 21999 |
+
"learning_rate": 4.859007863259558e-05,
|
| 22000 |
+
"loss": 0.0285,
|
| 22001 |
+
"step": 155450
|
| 22002 |
+
},
|
| 22003 |
+
{
|
| 22004 |
+
"epoch": 19.673582995951417,
|
| 22005 |
+
"grad_norm": 1.7790184020996094,
|
| 22006 |
+
"learning_rate": 4.858825037107588e-05,
|
| 22007 |
+
"loss": 0.0343,
|
| 22008 |
+
"step": 155500
|
| 22009 |
+
},
|
| 22010 |
+
{
|
| 22011 |
+
"epoch": 19.67990890688259,
|
| 22012 |
+
"grad_norm": 9.456090927124023,
|
| 22013 |
+
"learning_rate": 4.858642095940057e-05,
|
| 22014 |
+
"loss": 0.0322,
|
| 22015 |
+
"step": 155550
|
| 22016 |
+
},
|
| 22017 |
+
{
|
| 22018 |
+
"epoch": 19.686234817813766,
|
| 22019 |
+
"grad_norm": 2.5802087783813477,
|
| 22020 |
+
"learning_rate": 4.8584590397658857e-05,
|
| 22021 |
+
"loss": 0.0443,
|
| 22022 |
+
"step": 155600
|
| 22023 |
+
},
|
| 22024 |
+
{
|
| 22025 |
+
"epoch": 19.69256072874494,
|
| 22026 |
+
"grad_norm": 8.980761528015137,
|
| 22027 |
+
"learning_rate": 4.858275868594e-05,
|
| 22028 |
+
"loss": 0.0288,
|
| 22029 |
+
"step": 155650
|
| 22030 |
+
},
|
| 22031 |
+
{
|
| 22032 |
+
"epoch": 19.698886639676115,
|
| 22033 |
+
"grad_norm": 7.263606548309326,
|
| 22034 |
+
"learning_rate": 4.858092582433332e-05,
|
| 22035 |
+
"loss": 0.0401,
|
| 22036 |
+
"step": 155700
|
| 22037 |
+
},
|
| 22038 |
+
{
|
| 22039 |
+
"epoch": 19.70521255060729,
|
| 22040 |
+
"grad_norm": 2.930217981338501,
|
| 22041 |
+
"learning_rate": 4.857909181292818e-05,
|
| 22042 |
+
"loss": 0.0366,
|
| 22043 |
+
"step": 155750
|
| 22044 |
+
},
|
| 22045 |
+
{
|
| 22046 |
+
"epoch": 19.71153846153846,
|
| 22047 |
+
"grad_norm": 5.981229782104492,
|
| 22048 |
+
"learning_rate": 4.8577256651814004e-05,
|
| 22049 |
+
"loss": 0.0299,
|
| 22050 |
+
"step": 155800
|
| 22051 |
+
},
|
| 22052 |
+
{
|
| 22053 |
+
"epoch": 19.717864372469634,
|
| 22054 |
+
"grad_norm": 3.564768075942993,
|
| 22055 |
+
"learning_rate": 4.857542034108028e-05,
|
| 22056 |
+
"loss": 0.035,
|
| 22057 |
+
"step": 155850
|
| 22058 |
+
},
|
| 22059 |
+
{
|
| 22060 |
+
"epoch": 19.72419028340081,
|
| 22061 |
+
"grad_norm": 5.352268218994141,
|
| 22062 |
+
"learning_rate": 4.8573582880816534e-05,
|
| 22063 |
+
"loss": 0.0354,
|
| 22064 |
+
"step": 155900
|
| 22065 |
+
},
|
| 22066 |
+
{
|
| 22067 |
+
"epoch": 19.730516194331983,
|
| 22068 |
+
"grad_norm": 7.472506046295166,
|
| 22069 |
+
"learning_rate": 4.857174427111237e-05,
|
| 22070 |
+
"loss": 0.0277,
|
| 22071 |
+
"step": 155950
|
| 22072 |
+
},
|
| 22073 |
+
{
|
| 22074 |
+
"epoch": 19.736842105263158,
|
| 22075 |
+
"grad_norm": 6.900599956512451,
|
| 22076 |
+
"learning_rate": 4.8569904512057446e-05,
|
| 22077 |
+
"loss": 0.036,
|
| 22078 |
+
"step": 156000
|
| 22079 |
+
},
|
| 22080 |
+
{
|
| 22081 |
+
"epoch": 19.743168016194332,
|
| 22082 |
+
"grad_norm": 10.559762001037598,
|
| 22083 |
+
"learning_rate": 4.8568063603741455e-05,
|
| 22084 |
+
"loss": 0.0399,
|
| 22085 |
+
"step": 156050
|
| 22086 |
+
},
|
| 22087 |
+
{
|
| 22088 |
+
"epoch": 19.749493927125506,
|
| 22089 |
+
"grad_norm": 0.7040484547615051,
|
| 22090 |
+
"learning_rate": 4.856622154625416e-05,
|
| 22091 |
+
"loss": 0.0379,
|
| 22092 |
+
"step": 156100
|
| 22093 |
+
},
|
| 22094 |
+
{
|
| 22095 |
+
"epoch": 19.75581983805668,
|
| 22096 |
+
"grad_norm": 6.720038890838623,
|
| 22097 |
+
"learning_rate": 4.856437833968538e-05,
|
| 22098 |
+
"loss": 0.0397,
|
| 22099 |
+
"step": 156150
|
| 22100 |
+
},
|
| 22101 |
+
{
|
| 22102 |
+
"epoch": 19.762145748987855,
|
| 22103 |
+
"grad_norm": 3.7190515995025635,
|
| 22104 |
+
"learning_rate": 4.8562533984124984e-05,
|
| 22105 |
+
"loss": 0.0348,
|
| 22106 |
+
"step": 156200
|
| 22107 |
+
},
|
| 22108 |
+
{
|
| 22109 |
+
"epoch": 19.76847165991903,
|
| 22110 |
+
"grad_norm": 0.9700234532356262,
|
| 22111 |
+
"learning_rate": 4.856068847966292e-05,
|
| 22112 |
+
"loss": 0.0377,
|
| 22113 |
+
"step": 156250
|
| 22114 |
+
},
|
| 22115 |
+
{
|
| 22116 |
+
"epoch": 19.774797570850204,
|
| 22117 |
+
"grad_norm": 5.793466567993164,
|
| 22118 |
+
"learning_rate": 4.8558841826389154e-05,
|
| 22119 |
+
"loss": 0.0324,
|
| 22120 |
+
"step": 156300
|
| 22121 |
+
},
|
| 22122 |
+
{
|
| 22123 |
+
"epoch": 19.781123481781375,
|
| 22124 |
+
"grad_norm": 3.131021499633789,
|
| 22125 |
+
"learning_rate": 4.855699402439374e-05,
|
| 22126 |
+
"loss": 0.0296,
|
| 22127 |
+
"step": 156350
|
| 22128 |
+
},
|
| 22129 |
+
{
|
| 22130 |
+
"epoch": 19.78744939271255,
|
| 22131 |
+
"grad_norm": Infinity,
|
| 22132 |
+
"learning_rate": 4.8555145073766787e-05,
|
| 22133 |
+
"loss": 0.039,
|
| 22134 |
+
"step": 156400
|
| 22135 |
+
},
|
| 22136 |
+
{
|
| 22137 |
+
"epoch": 19.793775303643724,
|
| 22138 |
+
"grad_norm": 5.852443695068359,
|
| 22139 |
+
"learning_rate": 4.855329497459842e-05,
|
| 22140 |
+
"loss": 0.0402,
|
| 22141 |
+
"step": 156450
|
| 22142 |
+
},
|
| 22143 |
+
{
|
| 22144 |
+
"epoch": 19.800101214574898,
|
| 22145 |
+
"grad_norm": 7.113508701324463,
|
| 22146 |
+
"learning_rate": 4.855144372697887e-05,
|
| 22147 |
+
"loss": 0.0393,
|
| 22148 |
+
"step": 156500
|
| 22149 |
+
},
|
| 22150 |
+
{
|
| 22151 |
+
"epoch": 19.806427125506072,
|
| 22152 |
+
"grad_norm": 8.813881874084473,
|
| 22153 |
+
"learning_rate": 4.8549591330998394e-05,
|
| 22154 |
+
"loss": 0.0376,
|
| 22155 |
+
"step": 156550
|
| 22156 |
+
},
|
| 22157 |
+
{
|
| 22158 |
+
"epoch": 19.812753036437247,
|
| 22159 |
+
"grad_norm": 3.244015693664551,
|
| 22160 |
+
"learning_rate": 4.8547737786747326e-05,
|
| 22161 |
+
"loss": 0.039,
|
| 22162 |
+
"step": 156600
|
| 22163 |
+
},
|
| 22164 |
+
{
|
| 22165 |
+
"epoch": 19.81907894736842,
|
| 22166 |
+
"grad_norm": 5.63894510269165,
|
| 22167 |
+
"learning_rate": 4.8545883094316033e-05,
|
| 22168 |
+
"loss": 0.0378,
|
| 22169 |
+
"step": 156650
|
| 22170 |
+
},
|
| 22171 |
+
{
|
| 22172 |
+
"epoch": 19.825404858299596,
|
| 22173 |
+
"grad_norm": 8.88890552520752,
|
| 22174 |
+
"learning_rate": 4.854402725379495e-05,
|
| 22175 |
+
"loss": 0.0373,
|
| 22176 |
+
"step": 156700
|
| 22177 |
+
},
|
| 22178 |
+
{
|
| 22179 |
+
"epoch": 19.83173076923077,
|
| 22180 |
+
"grad_norm": 3.98940110206604,
|
| 22181 |
+
"learning_rate": 4.8542170265274573e-05,
|
| 22182 |
+
"loss": 0.0368,
|
| 22183 |
+
"step": 156750
|
| 22184 |
+
},
|
| 22185 |
+
{
|
| 22186 |
+
"epoch": 19.838056680161944,
|
| 22187 |
+
"grad_norm": 2.761207342147827,
|
| 22188 |
+
"learning_rate": 4.854031212884545e-05,
|
| 22189 |
+
"loss": 0.0367,
|
| 22190 |
+
"step": 156800
|
| 22191 |
+
},
|
| 22192 |
+
{
|
| 22193 |
+
"epoch": 19.84438259109312,
|
| 22194 |
+
"grad_norm": 3.4558122158050537,
|
| 22195 |
+
"learning_rate": 4.853845284459817e-05,
|
| 22196 |
+
"loss": 0.0389,
|
| 22197 |
+
"step": 156850
|
| 22198 |
+
},
|
| 22199 |
+
{
|
| 22200 |
+
"epoch": 19.85070850202429,
|
| 22201 |
+
"grad_norm": 9.75803279876709,
|
| 22202 |
+
"learning_rate": 4.8536592412623396e-05,
|
| 22203 |
+
"loss": 0.0379,
|
| 22204 |
+
"step": 156900
|
| 22205 |
+
},
|
| 22206 |
+
{
|
| 22207 |
+
"epoch": 19.857034412955464,
|
| 22208 |
+
"grad_norm": 4.632531642913818,
|
| 22209 |
+
"learning_rate": 4.853473083301185e-05,
|
| 22210 |
+
"loss": 0.0369,
|
| 22211 |
+
"step": 156950
|
| 22212 |
+
},
|
| 22213 |
+
{
|
| 22214 |
+
"epoch": 19.86336032388664,
|
| 22215 |
+
"grad_norm": 4.92626953125,
|
| 22216 |
+
"learning_rate": 4.853286810585431e-05,
|
| 22217 |
+
"loss": 0.0375,
|
| 22218 |
+
"step": 157000
|
| 22219 |
+
},
|
| 22220 |
+
{
|
| 22221 |
+
"epoch": 19.869686234817813,
|
| 22222 |
+
"grad_norm": 3.236208438873291,
|
| 22223 |
+
"learning_rate": 4.853100423124157e-05,
|
| 22224 |
+
"loss": 0.0445,
|
| 22225 |
+
"step": 157050
|
| 22226 |
+
},
|
| 22227 |
+
{
|
| 22228 |
+
"epoch": 19.876012145748987,
|
| 22229 |
+
"grad_norm": 1.6738104820251465,
|
| 22230 |
+
"learning_rate": 4.852913920926455e-05,
|
| 22231 |
+
"loss": 0.0377,
|
| 22232 |
+
"step": 157100
|
| 22233 |
+
},
|
| 22234 |
+
{
|
| 22235 |
+
"epoch": 19.88233805668016,
|
| 22236 |
+
"grad_norm": 7.1409430503845215,
|
| 22237 |
+
"learning_rate": 4.852727304001415e-05,
|
| 22238 |
+
"loss": 0.0393,
|
| 22239 |
+
"step": 157150
|
| 22240 |
+
},
|
| 22241 |
+
{
|
| 22242 |
+
"epoch": 19.888663967611336,
|
| 22243 |
+
"grad_norm": 0.7441387176513672,
|
| 22244 |
+
"learning_rate": 4.85254057235814e-05,
|
| 22245 |
+
"loss": 0.0336,
|
| 22246 |
+
"step": 157200
|
| 22247 |
+
},
|
| 22248 |
+
{
|
| 22249 |
+
"epoch": 19.89498987854251,
|
| 22250 |
+
"grad_norm": 3.237637758255005,
|
| 22251 |
+
"learning_rate": 4.852353726005733e-05,
|
| 22252 |
+
"loss": 0.029,
|
| 22253 |
+
"step": 157250
|
| 22254 |
+
},
|
| 22255 |
+
{
|
| 22256 |
+
"epoch": 19.901315789473685,
|
| 22257 |
+
"grad_norm": 3.43420147895813,
|
| 22258 |
+
"learning_rate": 4.852166764953304e-05,
|
| 22259 |
+
"loss": 0.0409,
|
| 22260 |
+
"step": 157300
|
| 22261 |
+
},
|
| 22262 |
+
{
|
| 22263 |
+
"epoch": 19.90764170040486,
|
| 22264 |
+
"grad_norm": 0.425411581993103,
|
| 22265 |
+
"learning_rate": 4.851979689209971e-05,
|
| 22266 |
+
"loss": 0.0391,
|
| 22267 |
+
"step": 157350
|
| 22268 |
+
},
|
| 22269 |
+
{
|
| 22270 |
+
"epoch": 19.913967611336034,
|
| 22271 |
+
"grad_norm": 2.6192097663879395,
|
| 22272 |
+
"learning_rate": 4.851792498784854e-05,
|
| 22273 |
+
"loss": 0.0322,
|
| 22274 |
+
"step": 157400
|
| 22275 |
+
},
|
| 22276 |
+
{
|
| 22277 |
+
"epoch": 19.920293522267208,
|
| 22278 |
+
"grad_norm": 5.374281883239746,
|
| 22279 |
+
"learning_rate": 4.851605193687082e-05,
|
| 22280 |
+
"loss": 0.0364,
|
| 22281 |
+
"step": 157450
|
| 22282 |
+
},
|
| 22283 |
+
{
|
| 22284 |
+
"epoch": 19.92661943319838,
|
| 22285 |
+
"grad_norm": 7.721480369567871,
|
| 22286 |
+
"learning_rate": 4.851417773925786e-05,
|
| 22287 |
+
"loss": 0.0373,
|
| 22288 |
+
"step": 157500
|
| 22289 |
+
},
|
| 22290 |
+
{
|
| 22291 |
+
"epoch": 19.932945344129553,
|
| 22292 |
+
"grad_norm": 2.180267333984375,
|
| 22293 |
+
"learning_rate": 4.8512302395101065e-05,
|
| 22294 |
+
"loss": 0.0376,
|
| 22295 |
+
"step": 157550
|
| 22296 |
+
},
|
| 22297 |
+
{
|
| 22298 |
+
"epoch": 19.939271255060728,
|
| 22299 |
+
"grad_norm": 7.930179595947266,
|
| 22300 |
+
"learning_rate": 4.8510425904491865e-05,
|
| 22301 |
+
"loss": 0.0348,
|
| 22302 |
+
"step": 157600
|
| 22303 |
+
},
|
| 22304 |
+
{
|
| 22305 |
+
"epoch": 19.945597165991902,
|
| 22306 |
+
"grad_norm": 4.858283519744873,
|
| 22307 |
+
"learning_rate": 4.850854826752176e-05,
|
| 22308 |
+
"loss": 0.0366,
|
| 22309 |
+
"step": 157650
|
| 22310 |
+
},
|
| 22311 |
+
{
|
| 22312 |
+
"epoch": 19.951923076923077,
|
| 22313 |
+
"grad_norm": 4.0790910720825195,
|
| 22314 |
+
"learning_rate": 4.8506669484282295e-05,
|
| 22315 |
+
"loss": 0.0376,
|
| 22316 |
+
"step": 157700
|
| 22317 |
+
},
|
| 22318 |
+
{
|
| 22319 |
+
"epoch": 19.95824898785425,
|
| 22320 |
+
"grad_norm": 3.098381280899048,
|
| 22321 |
+
"learning_rate": 4.8504789554865094e-05,
|
| 22322 |
+
"loss": 0.0376,
|
| 22323 |
+
"step": 157750
|
| 22324 |
+
},
|
| 22325 |
+
{
|
| 22326 |
+
"epoch": 19.964574898785425,
|
| 22327 |
+
"grad_norm": 2.5823540687561035,
|
| 22328 |
+
"learning_rate": 4.850290847936181e-05,
|
| 22329 |
+
"loss": 0.0311,
|
| 22330 |
+
"step": 157800
|
| 22331 |
+
},
|
| 22332 |
+
{
|
| 22333 |
+
"epoch": 19.9709008097166,
|
| 22334 |
+
"grad_norm": 7.660962104797363,
|
| 22335 |
+
"learning_rate": 4.850102625786417e-05,
|
| 22336 |
+
"loss": 0.035,
|
| 22337 |
+
"step": 157850
|
| 22338 |
+
},
|
| 22339 |
+
{
|
| 22340 |
+
"epoch": 19.977226720647774,
|
| 22341 |
+
"grad_norm": 5.540204048156738,
|
| 22342 |
+
"learning_rate": 4.8499142890463944e-05,
|
| 22343 |
+
"loss": 0.0397,
|
| 22344 |
+
"step": 157900
|
| 22345 |
+
},
|
| 22346 |
+
{
|
| 22347 |
+
"epoch": 19.98355263157895,
|
| 22348 |
+
"grad_norm": 0.6465005278587341,
|
| 22349 |
+
"learning_rate": 4.8497258377252976e-05,
|
| 22350 |
+
"loss": 0.0412,
|
| 22351 |
+
"step": 157950
|
| 22352 |
+
},
|
| 22353 |
+
{
|
| 22354 |
+
"epoch": 19.989878542510123,
|
| 22355 |
+
"grad_norm": 6.434248447418213,
|
| 22356 |
+
"learning_rate": 4.849537271832314e-05,
|
| 22357 |
+
"loss": 0.0335,
|
| 22358 |
+
"step": 158000
|
| 22359 |
+
},
|
| 22360 |
+
{
|
| 22361 |
+
"epoch": 19.996204453441294,
|
| 22362 |
+
"grad_norm": 4.138142108917236,
|
| 22363 |
+
"learning_rate": 4.849348591376639e-05,
|
| 22364 |
+
"loss": 0.0304,
|
| 22365 |
+
"step": 158050
|
| 22366 |
+
},
|
| 22367 |
+
{
|
| 22368 |
+
"epoch": 20.0,
|
| 22369 |
+
"eval_accuracy": 0.962463721125179,
|
| 22370 |
+
"eval_f1": 0.9629053160719874,
|
| 22371 |
+
"eval_loss": 0.15447956323623657,
|
| 22372 |
+
"eval_precision": 0.9632749556569759,
|
| 22373 |
+
"eval_recall": 0.9628388534195883,
|
| 22374 |
+
"eval_runtime": 395.1222,
|
| 22375 |
+
"eval_samples_per_second": 640.06,
|
| 22376 |
+
"eval_steps_per_second": 5.001,
|
| 22377 |
+
"step": 158080
|
| 22378 |
}
|
| 22379 |
],
|
| 22380 |
"logging_steps": 50,
|
|
|
|
| 22389 |
"early_stopping_threshold": 0.0
|
| 22390 |
},
|
| 22391 |
"attributes": {
|
| 22392 |
+
"early_stopping_patience_counter": 1
|
| 22393 |
}
|
| 22394 |
},
|
| 22395 |
"TrainerControl": {
|
|
|
|
| 22403 |
"attributes": {}
|
| 22404 |
}
|
| 22405 |
},
|
| 22406 |
+
"total_flos": 5.099647448326135e+18,
|
| 22407 |
"train_batch_size": 128,
|
| 22408 |
"trial_name": null,
|
| 22409 |
"trial_params": null
|
trained_model_weight/{checkpoint-142272 β checkpoint-158080}/training_args.bin
RENAMED
|
File without changes
|
trained_model_weight/{checkpoint-142272 β checkpoint-158080}/vocab.json
RENAMED
|
File without changes
|
trained_model_weight/logs/events.out.tfevents.1766505359.38224b27a3cf.4653.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:797385f7462566a35b79e1aeb02b3fc6b916cb50154c78ad205929ac4c2c8730
|
| 3 |
+
size 704728
|