Training in progress, step 855, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step855/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step855/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step855/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step855/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +676 -3
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1527066456
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0355df4e5063562f37d09abcb14b93955eed834c29f96e9873c1ae6867862c3c
|
| 3 |
size 1527066456
|
last-checkpoint/global_step855/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e3946b3681927eb61dbc0943eac914d926599e3e93d2bb344c31a171fb3f88a
|
| 3 |
+
size 2303346228
|
last-checkpoint/global_step855/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:957045c26127b5ae0cf698962b6f3bea091dd1f36a574987ff44d98775cd7601
|
| 3 |
+
size 2303346228
|
last-checkpoint/global_step855/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0edccf7dfc2621373d5aad8800a44738a6fd5044232f274d3c92fe9166a15764
|
| 3 |
+
size 354242335
|
last-checkpoint/global_step855/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01b813789517429aedbb8c970fc2103dd038913fa55a904f2f73ed1c12a4f2da
|
| 3 |
+
size 354242335
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step855
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14917
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03593c226bb4a31df5c4e26aaac693fbb9741d071e04c601b5ac1382f878f52b
|
| 3 |
size 14917
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14917
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d987383f11815234d8954c0b84d9c5f500a84408fd66a9795cd135ccfaa9f970
|
| 3 |
size 14917
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:515697a4ba82550e0f5b33a4815d00a842066a9ee3219fde5b4a86b4c7e265ef
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 95,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -5400,6 +5400,679 @@
|
|
| 5400 |
"eval_samples_per_second": 0.578,
|
| 5401 |
"eval_steps_per_second": 0.29,
|
| 5402 |
"step": 760
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5403 |
}
|
| 5404 |
],
|
| 5405 |
"logging_steps": 1,
|
|
@@ -5419,7 +6092,7 @@
|
|
| 5419 |
"attributes": {}
|
| 5420 |
}
|
| 5421 |
},
|
| 5422 |
-
"total_flos": 3.
|
| 5423 |
"train_batch_size": 1,
|
| 5424 |
"trial_name": null,
|
| 5425 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.9054805401111994,
|
| 6 |
"eval_steps": 95,
|
| 7 |
+
"global_step": 855,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 5400 |
"eval_samples_per_second": 0.578,
|
| 5401 |
"eval_steps_per_second": 0.29,
|
| 5402 |
"step": 760
|
| 5403 |
+
},
|
| 5404 |
+
{
|
| 5405 |
+
"epoch": 0.8059306327773366,
|
| 5406 |
+
"grad_norm": 0.6803131636311378,
|
| 5407 |
+
"learning_rate": 7.140070230198985e-06,
|
| 5408 |
+
"loss": 1.8325,
|
| 5409 |
+
"step": 761
|
| 5410 |
+
},
|
| 5411 |
+
{
|
| 5412 |
+
"epoch": 0.806989674344718,
|
| 5413 |
+
"grad_norm": 0.5797389494956084,
|
| 5414 |
+
"learning_rate": 7.126076742364918e-06,
|
| 5415 |
+
"loss": 1.9813,
|
| 5416 |
+
"step": 762
|
| 5417 |
+
},
|
| 5418 |
+
{
|
| 5419 |
+
"epoch": 0.8080487159120996,
|
| 5420 |
+
"grad_norm": 0.44784413151847136,
|
| 5421 |
+
"learning_rate": 7.111984282907661e-06,
|
| 5422 |
+
"loss": 2.3973,
|
| 5423 |
+
"step": 763
|
| 5424 |
+
},
|
| 5425 |
+
{
|
| 5426 |
+
"epoch": 0.809107757479481,
|
| 5427 |
+
"grad_norm": 1.5430000937697363,
|
| 5428 |
+
"learning_rate": 7.097791798107255e-06,
|
| 5429 |
+
"loss": 2.147,
|
| 5430 |
+
"step": 764
|
| 5431 |
+
},
|
| 5432 |
+
{
|
| 5433 |
+
"epoch": 0.8101667990468626,
|
| 5434 |
+
"grad_norm": 0.5136408504852492,
|
| 5435 |
+
"learning_rate": 7.083498219232293e-06,
|
| 5436 |
+
"loss": 1.848,
|
| 5437 |
+
"step": 765
|
| 5438 |
+
},
|
| 5439 |
+
{
|
| 5440 |
+
"epoch": 0.8112258406142441,
|
| 5441 |
+
"grad_norm": 0.5467920174538149,
|
| 5442 |
+
"learning_rate": 7.069102462271644e-06,
|
| 5443 |
+
"loss": 2.2383,
|
| 5444 |
+
"step": 766
|
| 5445 |
+
},
|
| 5446 |
+
{
|
| 5447 |
+
"epoch": 0.8122848821816256,
|
| 5448 |
+
"grad_norm": 0.5125300215381279,
|
| 5449 |
+
"learning_rate": 7.054603427660422e-06,
|
| 5450 |
+
"loss": 2.1579,
|
| 5451 |
+
"step": 767
|
| 5452 |
+
},
|
| 5453 |
+
{
|
| 5454 |
+
"epoch": 0.8133439237490071,
|
| 5455 |
+
"grad_norm": 0.8105607354385294,
|
| 5456 |
+
"learning_rate": 7.04e-06,
|
| 5457 |
+
"loss": 2.236,
|
| 5458 |
+
"step": 768
|
| 5459 |
+
},
|
| 5460 |
+
{
|
| 5461 |
+
"epoch": 0.8144029653163887,
|
| 5462 |
+
"grad_norm": 0.46810244908141757,
|
| 5463 |
+
"learning_rate": 7.02529104777198e-06,
|
| 5464 |
+
"loss": 2.1892,
|
| 5465 |
+
"step": 769
|
| 5466 |
+
},
|
| 5467 |
+
{
|
| 5468 |
+
"epoch": 0.8154620068837702,
|
| 5469 |
+
"grad_norm": 0.4455237449570357,
|
| 5470 |
+
"learning_rate": 7.0104754230459316e-06,
|
| 5471 |
+
"loss": 2.2361,
|
| 5472 |
+
"step": 770
|
| 5473 |
+
},
|
| 5474 |
+
{
|
| 5475 |
+
"epoch": 0.8165210484511517,
|
| 5476 |
+
"grad_norm": 0.5665884230798665,
|
| 5477 |
+
"learning_rate": 6.995551961180752e-06,
|
| 5478 |
+
"loss": 1.9058,
|
| 5479 |
+
"step": 771
|
| 5480 |
+
},
|
| 5481 |
+
{
|
| 5482 |
+
"epoch": 0.8175800900185333,
|
| 5483 |
+
"grad_norm": 0.4633746613411758,
|
| 5484 |
+
"learning_rate": 6.980519480519481e-06,
|
| 5485 |
+
"loss": 2.1833,
|
| 5486 |
+
"step": 772
|
| 5487 |
+
},
|
| 5488 |
+
{
|
| 5489 |
+
"epoch": 0.8186391315859147,
|
| 5490 |
+
"grad_norm": 0.44219293669742443,
|
| 5491 |
+
"learning_rate": 6.965376782077392e-06,
|
| 5492 |
+
"loss": 2.434,
|
| 5493 |
+
"step": 773
|
| 5494 |
+
},
|
| 5495 |
+
{
|
| 5496 |
+
"epoch": 0.8196981731532963,
|
| 5497 |
+
"grad_norm": 0.42875965911174113,
|
| 5498 |
+
"learning_rate": 6.950122649223222e-06,
|
| 5499 |
+
"loss": 2.1409,
|
| 5500 |
+
"step": 774
|
| 5501 |
+
},
|
| 5502 |
+
{
|
| 5503 |
+
"epoch": 0.8207572147206778,
|
| 5504 |
+
"grad_norm": 0.47230346181676486,
|
| 5505 |
+
"learning_rate": 6.934755847353305e-06,
|
| 5506 |
+
"loss": 2.1609,
|
| 5507 |
+
"step": 775
|
| 5508 |
+
},
|
| 5509 |
+
{
|
| 5510 |
+
"epoch": 0.8218162562880593,
|
| 5511 |
+
"grad_norm": 0.47784292161116215,
|
| 5512 |
+
"learning_rate": 6.919275123558486e-06,
|
| 5513 |
+
"loss": 2.0429,
|
| 5514 |
+
"step": 776
|
| 5515 |
+
},
|
| 5516 |
+
{
|
| 5517 |
+
"epoch": 0.8228752978554408,
|
| 5518 |
+
"grad_norm": 1.1727376426648415,
|
| 5519 |
+
"learning_rate": 6.903679206283588e-06,
|
| 5520 |
+
"loss": 1.9888,
|
| 5521 |
+
"step": 777
|
| 5522 |
+
},
|
| 5523 |
+
{
|
| 5524 |
+
"epoch": 0.8239343394228223,
|
| 5525 |
+
"grad_norm": 2.1540603636137363,
|
| 5526 |
+
"learning_rate": 6.887966804979253e-06,
|
| 5527 |
+
"loss": 2.2259,
|
| 5528 |
+
"step": 778
|
| 5529 |
+
},
|
| 5530 |
+
{
|
| 5531 |
+
"epoch": 0.8249933809902039,
|
| 5532 |
+
"grad_norm": 0.39665352588092573,
|
| 5533 |
+
"learning_rate": 6.8721366097459395e-06,
|
| 5534 |
+
"loss": 2.1907,
|
| 5535 |
+
"step": 779
|
| 5536 |
+
},
|
| 5537 |
+
{
|
| 5538 |
+
"epoch": 0.8260524225575854,
|
| 5539 |
+
"grad_norm": 0.41043373870823585,
|
| 5540 |
+
"learning_rate": 6.8561872909699e-06,
|
| 5541 |
+
"loss": 2.2407,
|
| 5542 |
+
"step": 780
|
| 5543 |
+
},
|
| 5544 |
+
{
|
| 5545 |
+
"epoch": 0.8271114641249669,
|
| 5546 |
+
"grad_norm": 0.5256052719666096,
|
| 5547 |
+
"learning_rate": 6.840117498950903e-06,
|
| 5548 |
+
"loss": 1.9323,
|
| 5549 |
+
"step": 781
|
| 5550 |
+
},
|
| 5551 |
+
{
|
| 5552 |
+
"epoch": 0.8281705056923484,
|
| 5553 |
+
"grad_norm": 0.43680474985593193,
|
| 5554 |
+
"learning_rate": 6.823925863521483e-06,
|
| 5555 |
+
"loss": 2.3057,
|
| 5556 |
+
"step": 782
|
| 5557 |
+
},
|
| 5558 |
+
{
|
| 5559 |
+
"epoch": 0.82922954725973,
|
| 5560 |
+
"grad_norm": 0.56024481376916,
|
| 5561 |
+
"learning_rate": 6.807610993657505e-06,
|
| 5562 |
+
"loss": 2.283,
|
| 5563 |
+
"step": 783
|
| 5564 |
+
},
|
| 5565 |
+
{
|
| 5566 |
+
"epoch": 0.8302885888271114,
|
| 5567 |
+
"grad_norm": 2.461245897952525,
|
| 5568 |
+
"learning_rate": 6.7911714770797965e-06,
|
| 5569 |
+
"loss": 2.2754,
|
| 5570 |
+
"step": 784
|
| 5571 |
+
},
|
| 5572 |
+
{
|
| 5573 |
+
"epoch": 0.831347630394493,
|
| 5574 |
+
"grad_norm": 0.4239538684026029,
|
| 5575 |
+
"learning_rate": 6.774605879846613e-06,
|
| 5576 |
+
"loss": 2.1561,
|
| 5577 |
+
"step": 785
|
| 5578 |
+
},
|
| 5579 |
+
{
|
| 5580 |
+
"epoch": 0.8324066719618745,
|
| 5581 |
+
"grad_norm": 0.5086795267311749,
|
| 5582 |
+
"learning_rate": 6.757912745936699e-06,
|
| 5583 |
+
"loss": 1.9561,
|
| 5584 |
+
"step": 786
|
| 5585 |
+
},
|
| 5586 |
+
{
|
| 5587 |
+
"epoch": 0.833465713529256,
|
| 5588 |
+
"grad_norm": 0.529180140080738,
|
| 5589 |
+
"learning_rate": 6.7410905968226705e-06,
|
| 5590 |
+
"loss": 2.2472,
|
| 5591 |
+
"step": 787
|
| 5592 |
+
},
|
| 5593 |
+
{
|
| 5594 |
+
"epoch": 0.8345247550966376,
|
| 5595 |
+
"grad_norm": 0.906770591412783,
|
| 5596 |
+
"learning_rate": 6.724137931034482e-06,
|
| 5597 |
+
"loss": 2.0894,
|
| 5598 |
+
"step": 788
|
| 5599 |
+
},
|
| 5600 |
+
{
|
| 5601 |
+
"epoch": 0.835583796664019,
|
| 5602 |
+
"grad_norm": 0.4763073163865911,
|
| 5603 |
+
"learning_rate": 6.707053223712678e-06,
|
| 5604 |
+
"loss": 2.1889,
|
| 5605 |
+
"step": 789
|
| 5606 |
+
},
|
| 5607 |
+
{
|
| 5608 |
+
"epoch": 0.8366428382314006,
|
| 5609 |
+
"grad_norm": 0.41043514041608953,
|
| 5610 |
+
"learning_rate": 6.689834926151174e-06,
|
| 5611 |
+
"loss": 2.2596,
|
| 5612 |
+
"step": 790
|
| 5613 |
+
},
|
| 5614 |
+
{
|
| 5615 |
+
"epoch": 0.8377018797987821,
|
| 5616 |
+
"grad_norm": 0.4875150728481836,
|
| 5617 |
+
"learning_rate": 6.672481465329265e-06,
|
| 5618 |
+
"loss": 2.171,
|
| 5619 |
+
"step": 791
|
| 5620 |
+
},
|
| 5621 |
+
{
|
| 5622 |
+
"epoch": 0.8387609213661636,
|
| 5623 |
+
"grad_norm": 0.4563054157354168,
|
| 5624 |
+
"learning_rate": 6.654991243432575e-06,
|
| 5625 |
+
"loss": 2.0479,
|
| 5626 |
+
"step": 792
|
| 5627 |
+
},
|
| 5628 |
+
{
|
| 5629 |
+
"epoch": 0.8398199629335451,
|
| 5630 |
+
"grad_norm": 0.40698891088293526,
|
| 5631 |
+
"learning_rate": 6.637362637362638e-06,
|
| 5632 |
+
"loss": 2.1311,
|
| 5633 |
+
"step": 793
|
| 5634 |
+
},
|
| 5635 |
+
{
|
| 5636 |
+
"epoch": 0.8408790045009267,
|
| 5637 |
+
"grad_norm": 1.3105970922128873,
|
| 5638 |
+
"learning_rate": 6.619593998234774e-06,
|
| 5639 |
+
"loss": 2.1572,
|
| 5640 |
+
"step": 794
|
| 5641 |
+
},
|
| 5642 |
+
{
|
| 5643 |
+
"epoch": 0.8419380460683081,
|
| 5644 |
+
"grad_norm": 0.5405021865928985,
|
| 5645 |
+
"learning_rate": 6.601683650863979e-06,
|
| 5646 |
+
"loss": 2.0705,
|
| 5647 |
+
"step": 795
|
| 5648 |
+
},
|
| 5649 |
+
{
|
| 5650 |
+
"epoch": 0.8429970876356897,
|
| 5651 |
+
"grad_norm": 0.4641185563260275,
|
| 5652 |
+
"learning_rate": 6.5836298932384346e-06,
|
| 5653 |
+
"loss": 2.2504,
|
| 5654 |
+
"step": 796
|
| 5655 |
+
},
|
| 5656 |
+
{
|
| 5657 |
+
"epoch": 0.8440561292030713,
|
| 5658 |
+
"grad_norm": 0.42421600635418305,
|
| 5659 |
+
"learning_rate": 6.565430995980349e-06,
|
| 5660 |
+
"loss": 2.2085,
|
| 5661 |
+
"step": 797
|
| 5662 |
+
},
|
| 5663 |
+
{
|
| 5664 |
+
"epoch": 0.8451151707704527,
|
| 5665 |
+
"grad_norm": 0.49354822735708614,
|
| 5666 |
+
"learning_rate": 6.547085201793723e-06,
|
| 5667 |
+
"loss": 2.1794,
|
| 5668 |
+
"step": 798
|
| 5669 |
+
},
|
| 5670 |
+
{
|
| 5671 |
+
"epoch": 0.8461742123378343,
|
| 5672 |
+
"grad_norm": 0.3921126516756366,
|
| 5673 |
+
"learning_rate": 6.528590724898695e-06,
|
| 5674 |
+
"loss": 2.227,
|
| 5675 |
+
"step": 799
|
| 5676 |
+
},
|
| 5677 |
+
{
|
| 5678 |
+
"epoch": 0.8472332539052158,
|
| 5679 |
+
"grad_norm": 0.6764827329334605,
|
| 5680 |
+
"learning_rate": 6.50994575045208e-06,
|
| 5681 |
+
"loss": 2.1332,
|
| 5682 |
+
"step": 800
|
| 5683 |
+
},
|
| 5684 |
+
{
|
| 5685 |
+
"epoch": 0.8482922954725973,
|
| 5686 |
+
"grad_norm": 0.4061658039995484,
|
| 5687 |
+
"learning_rate": 6.4911484339537e-06,
|
| 5688 |
+
"loss": 2.3831,
|
| 5689 |
+
"step": 801
|
| 5690 |
+
},
|
| 5691 |
+
{
|
| 5692 |
+
"epoch": 0.8493513370399788,
|
| 5693 |
+
"grad_norm": 0.5826701454101317,
|
| 5694 |
+
"learning_rate": 6.4721969006381045e-06,
|
| 5695 |
+
"loss": 2.3832,
|
| 5696 |
+
"step": 802
|
| 5697 |
+
},
|
| 5698 |
+
{
|
| 5699 |
+
"epoch": 0.8504103786073604,
|
| 5700 |
+
"grad_norm": 0.41435088004419374,
|
| 5701 |
+
"learning_rate": 6.453089244851258e-06,
|
| 5702 |
+
"loss": 2.2416,
|
| 5703 |
+
"step": 803
|
| 5704 |
+
},
|
| 5705 |
+
{
|
| 5706 |
+
"epoch": 0.8514694201747418,
|
| 5707 |
+
"grad_norm": 0.4150959050115928,
|
| 5708 |
+
"learning_rate": 6.433823529411766e-06,
|
| 5709 |
+
"loss": 2.0757,
|
| 5710 |
+
"step": 804
|
| 5711 |
+
},
|
| 5712 |
+
{
|
| 5713 |
+
"epoch": 0.8525284617421234,
|
| 5714 |
+
"grad_norm": 0.43352625752977775,
|
| 5715 |
+
"learning_rate": 6.414397784956161e-06,
|
| 5716 |
+
"loss": 1.9316,
|
| 5717 |
+
"step": 805
|
| 5718 |
+
},
|
| 5719 |
+
{
|
| 5720 |
+
"epoch": 0.853587503309505,
|
| 5721 |
+
"grad_norm": 0.5885629954685914,
|
| 5722 |
+
"learning_rate": 6.39481000926784e-06,
|
| 5723 |
+
"loss": 2.283,
|
| 5724 |
+
"step": 806
|
| 5725 |
+
},
|
| 5726 |
+
{
|
| 5727 |
+
"epoch": 0.8546465448768864,
|
| 5728 |
+
"grad_norm": 0.6425274615571129,
|
| 5729 |
+
"learning_rate": 6.375058166589111e-06,
|
| 5730 |
+
"loss": 2.1418,
|
| 5731 |
+
"step": 807
|
| 5732 |
+
},
|
| 5733 |
+
{
|
| 5734 |
+
"epoch": 0.855705586444268,
|
| 5735 |
+
"grad_norm": 0.3921348356319304,
|
| 5736 |
+
"learning_rate": 6.355140186915888e-06,
|
| 5737 |
+
"loss": 2.367,
|
| 5738 |
+
"step": 808
|
| 5739 |
+
},
|
| 5740 |
+
{
|
| 5741 |
+
"epoch": 0.8567646280116494,
|
| 5742 |
+
"grad_norm": 0.48187968387901975,
|
| 5743 |
+
"learning_rate": 6.3350539652745195e-06,
|
| 5744 |
+
"loss": 2.2415,
|
| 5745 |
+
"step": 809
|
| 5746 |
+
},
|
| 5747 |
+
{
|
| 5748 |
+
"epoch": 0.857823669579031,
|
| 5749 |
+
"grad_norm": 0.5163238323570051,
|
| 5750 |
+
"learning_rate": 6.3147973609802075e-06,
|
| 5751 |
+
"loss": 2.096,
|
| 5752 |
+
"step": 810
|
| 5753 |
+
},
|
| 5754 |
+
{
|
| 5755 |
+
"epoch": 0.8588827111464125,
|
| 5756 |
+
"grad_norm": 0.42537069143517753,
|
| 5757 |
+
"learning_rate": 6.294368196876479e-06,
|
| 5758 |
+
"loss": 2.2303,
|
| 5759 |
+
"step": 811
|
| 5760 |
+
},
|
| 5761 |
+
{
|
| 5762 |
+
"epoch": 0.859941752713794,
|
| 5763 |
+
"grad_norm": 0.5111353722058767,
|
| 5764 |
+
"learning_rate": 6.273764258555133e-06,
|
| 5765 |
+
"loss": 2.1343,
|
| 5766 |
+
"step": 812
|
| 5767 |
+
},
|
| 5768 |
+
{
|
| 5769 |
+
"epoch": 0.8610007942811755,
|
| 5770 |
+
"grad_norm": 0.4948365929852451,
|
| 5771 |
+
"learning_rate": 6.252983293556086e-06,
|
| 5772 |
+
"loss": 2.2779,
|
| 5773 |
+
"step": 813
|
| 5774 |
+
},
|
| 5775 |
+
{
|
| 5776 |
+
"epoch": 0.862059835848557,
|
| 5777 |
+
"grad_norm": 0.42418098706232704,
|
| 5778 |
+
"learning_rate": 6.232023010546501e-06,
|
| 5779 |
+
"loss": 2.2429,
|
| 5780 |
+
"step": 814
|
| 5781 |
+
},
|
| 5782 |
+
{
|
| 5783 |
+
"epoch": 0.8631188774159386,
|
| 5784 |
+
"grad_norm": 0.592767643023693,
|
| 5785 |
+
"learning_rate": 6.210881078478576e-06,
|
| 5786 |
+
"loss": 2.1041,
|
| 5787 |
+
"step": 815
|
| 5788 |
+
},
|
| 5789 |
+
{
|
| 5790 |
+
"epoch": 0.8641779189833201,
|
| 5791 |
+
"grad_norm": 1.111541479840316,
|
| 5792 |
+
"learning_rate": 6.189555125725339e-06,
|
| 5793 |
+
"loss": 2.2028,
|
| 5794 |
+
"step": 816
|
| 5795 |
+
},
|
| 5796 |
+
{
|
| 5797 |
+
"epoch": 0.8652369605507017,
|
| 5798 |
+
"grad_norm": 0.39821783655109405,
|
| 5799 |
+
"learning_rate": 6.168042739193783e-06,
|
| 5800 |
+
"loss": 2.2218,
|
| 5801 |
+
"step": 817
|
| 5802 |
+
},
|
| 5803 |
+
{
|
| 5804 |
+
"epoch": 0.8662960021180831,
|
| 5805 |
+
"grad_norm": 1.003472324980434,
|
| 5806 |
+
"learning_rate": 6.1463414634146346e-06,
|
| 5807 |
+
"loss": 2.1237,
|
| 5808 |
+
"step": 818
|
| 5809 |
+
},
|
| 5810 |
+
{
|
| 5811 |
+
"epoch": 0.8673550436854647,
|
| 5812 |
+
"grad_norm": 0.5140913920253216,
|
| 5813 |
+
"learning_rate": 6.124448799608036e-06,
|
| 5814 |
+
"loss": 2.2554,
|
| 5815 |
+
"step": 819
|
| 5816 |
+
},
|
| 5817 |
+
{
|
| 5818 |
+
"epoch": 0.8684140852528461,
|
| 5819 |
+
"grad_norm": 1.304653340463634,
|
| 5820 |
+
"learning_rate": 6.1023622047244104e-06,
|
| 5821 |
+
"loss": 2.1231,
|
| 5822 |
+
"step": 820
|
| 5823 |
+
},
|
| 5824 |
+
{
|
| 5825 |
+
"epoch": 0.8694731268202277,
|
| 5826 |
+
"grad_norm": 0.46644604217264457,
|
| 5827 |
+
"learning_rate": 6.080079090459714e-06,
|
| 5828 |
+
"loss": 2.068,
|
| 5829 |
+
"step": 821
|
| 5830 |
+
},
|
| 5831 |
+
{
|
| 5832 |
+
"epoch": 0.8705321683876092,
|
| 5833 |
+
"grad_norm": 0.3784392165588507,
|
| 5834 |
+
"learning_rate": 6.05759682224429e-06,
|
| 5835 |
+
"loss": 2.3086,
|
| 5836 |
+
"step": 822
|
| 5837 |
+
},
|
| 5838 |
+
{
|
| 5839 |
+
"epoch": 0.8715912099549907,
|
| 5840 |
+
"grad_norm": 0.42750467258430275,
|
| 5841 |
+
"learning_rate": 6.03491271820449e-06,
|
| 5842 |
+
"loss": 2.3433,
|
| 5843 |
+
"step": 823
|
| 5844 |
+
},
|
| 5845 |
+
{
|
| 5846 |
+
"epoch": 0.8726502515223723,
|
| 5847 |
+
"grad_norm": 0.3855344094267301,
|
| 5848 |
+
"learning_rate": 6.0120240480961935e-06,
|
| 5849 |
+
"loss": 2.2661,
|
| 5850 |
+
"step": 824
|
| 5851 |
+
},
|
| 5852 |
+
{
|
| 5853 |
+
"epoch": 0.8737092930897538,
|
| 5854 |
+
"grad_norm": 0.43412569682236263,
|
| 5855 |
+
"learning_rate": 5.9889280322093616e-06,
|
| 5856 |
+
"loss": 2.0809,
|
| 5857 |
+
"step": 825
|
| 5858 |
+
},
|
| 5859 |
+
{
|
| 5860 |
+
"epoch": 0.8747683346571353,
|
| 5861 |
+
"grad_norm": 0.4567508929265723,
|
| 5862 |
+
"learning_rate": 5.96562184024267e-06,
|
| 5863 |
+
"loss": 1.9931,
|
| 5864 |
+
"step": 826
|
| 5865 |
+
},
|
| 5866 |
+
{
|
| 5867 |
+
"epoch": 0.8758273762245168,
|
| 5868 |
+
"grad_norm": 0.7858185134588669,
|
| 5869 |
+
"learning_rate": 5.942102590147283e-06,
|
| 5870 |
+
"loss": 1.8915,
|
| 5871 |
+
"step": 827
|
| 5872 |
+
},
|
| 5873 |
+
{
|
| 5874 |
+
"epoch": 0.8768864177918984,
|
| 5875 |
+
"grad_norm": 0.4865728159534291,
|
| 5876 |
+
"learning_rate": 5.918367346938776e-06,
|
| 5877 |
+
"loss": 2.1063,
|
| 5878 |
+
"step": 828
|
| 5879 |
+
},
|
| 5880 |
+
{
|
| 5881 |
+
"epoch": 0.8779454593592798,
|
| 5882 |
+
"grad_norm": 0.7998515233126545,
|
| 5883 |
+
"learning_rate": 5.894413121476167e-06,
|
| 5884 |
+
"loss": 1.7472,
|
| 5885 |
+
"step": 829
|
| 5886 |
+
},
|
| 5887 |
+
{
|
| 5888 |
+
"epoch": 0.8790045009266614,
|
| 5889 |
+
"grad_norm": 0.4524437200975804,
|
| 5890 |
+
"learning_rate": 5.870236869207003e-06,
|
| 5891 |
+
"loss": 2.2934,
|
| 5892 |
+
"step": 830
|
| 5893 |
+
},
|
| 5894 |
+
{
|
| 5895 |
+
"epoch": 0.8800635424940428,
|
| 5896 |
+
"grad_norm": 0.4057519289307801,
|
| 5897 |
+
"learning_rate": 5.845835488877393e-06,
|
| 5898 |
+
"loss": 2.1486,
|
| 5899 |
+
"step": 831
|
| 5900 |
+
},
|
| 5901 |
+
{
|
| 5902 |
+
"epoch": 0.8811225840614244,
|
| 5903 |
+
"grad_norm": 0.4391117370907218,
|
| 5904 |
+
"learning_rate": 5.821205821205822e-06,
|
| 5905 |
+
"loss": 2.262,
|
| 5906 |
+
"step": 832
|
| 5907 |
+
},
|
| 5908 |
+
{
|
| 5909 |
+
"epoch": 0.882181625628806,
|
| 5910 |
+
"grad_norm": 0.6376172649412307,
|
| 5911 |
+
"learning_rate": 5.7963446475195825e-06,
|
| 5912 |
+
"loss": 2.155,
|
| 5913 |
+
"step": 833
|
| 5914 |
+
},
|
| 5915 |
+
{
|
| 5916 |
+
"epoch": 0.8832406671961874,
|
| 5917 |
+
"grad_norm": 0.37373198069593205,
|
| 5918 |
+
"learning_rate": 5.771248688352571e-06,
|
| 5919 |
+
"loss": 2.1126,
|
| 5920 |
+
"step": 834
|
| 5921 |
+
},
|
| 5922 |
+
{
|
| 5923 |
+
"epoch": 0.884299708763569,
|
| 5924 |
+
"grad_norm": 0.5897412582783383,
|
| 5925 |
+
"learning_rate": 5.745914602003163e-06,
|
| 5926 |
+
"loss": 2.2602,
|
| 5927 |
+
"step": 835
|
| 5928 |
+
},
|
| 5929 |
+
{
|
| 5930 |
+
"epoch": 0.8853587503309505,
|
| 5931 |
+
"grad_norm": 0.47097565886520626,
|
| 5932 |
+
"learning_rate": 5.720338983050848e-06,
|
| 5933 |
+
"loss": 1.8018,
|
| 5934 |
+
"step": 836
|
| 5935 |
+
},
|
| 5936 |
+
{
|
| 5937 |
+
"epoch": 0.886417791898332,
|
| 5938 |
+
"grad_norm": 0.4395931623754186,
|
| 5939 |
+
"learning_rate": 5.694518360830229e-06,
|
| 5940 |
+
"loss": 2.1072,
|
| 5941 |
+
"step": 837
|
| 5942 |
+
},
|
| 5943 |
+
{
|
| 5944 |
+
"epoch": 0.8874768334657135,
|
| 5945 |
+
"grad_norm": 0.46574222534352466,
|
| 5946 |
+
"learning_rate": 5.6684491978609635e-06,
|
| 5947 |
+
"loss": 2.1773,
|
| 5948 |
+
"step": 838
|
| 5949 |
+
},
|
| 5950 |
+
{
|
| 5951 |
+
"epoch": 0.8885358750330951,
|
| 5952 |
+
"grad_norm": 0.4551573251462227,
|
| 5953 |
+
"learning_rate": 5.642127888232134e-06,
|
| 5954 |
+
"loss": 2.3387,
|
| 5955 |
+
"step": 839
|
| 5956 |
+
},
|
| 5957 |
+
{
|
| 5958 |
+
"epoch": 0.8895949166004765,
|
| 5959 |
+
"grad_norm": 0.4493811713490901,
|
| 5960 |
+
"learning_rate": 5.615550755939525e-06,
|
| 5961 |
+
"loss": 2.166,
|
| 5962 |
+
"step": 840
|
| 5963 |
+
},
|
| 5964 |
+
{
|
| 5965 |
+
"epoch": 0.8906539581678581,
|
| 5966 |
+
"grad_norm": 0.39495794701015247,
|
| 5967 |
+
"learning_rate": 5.588714053174173e-06,
|
| 5968 |
+
"loss": 2.1217,
|
| 5969 |
+
"step": 841
|
| 5970 |
+
},
|
| 5971 |
+
{
|
| 5972 |
+
"epoch": 0.8917129997352397,
|
| 5973 |
+
"grad_norm": 0.5230334500510622,
|
| 5974 |
+
"learning_rate": 5.5616139585605235e-06,
|
| 5975 |
+
"loss": 2.208,
|
| 5976 |
+
"step": 842
|
| 5977 |
+
},
|
| 5978 |
+
{
|
| 5979 |
+
"epoch": 0.8927720413026211,
|
| 5980 |
+
"grad_norm": 0.5613270004933778,
|
| 5981 |
+
"learning_rate": 5.534246575342466e-06,
|
| 5982 |
+
"loss": 2.1997,
|
| 5983 |
+
"step": 843
|
| 5984 |
+
},
|
| 5985 |
+
{
|
| 5986 |
+
"epoch": 0.8938310828700027,
|
| 5987 |
+
"grad_norm": 0.5650905985537464,
|
| 5988 |
+
"learning_rate": 5.506607929515418e-06,
|
| 5989 |
+
"loss": 2.198,
|
| 5990 |
+
"step": 844
|
| 5991 |
+
},
|
| 5992 |
+
{
|
| 5993 |
+
"epoch": 0.8948901244373841,
|
| 5994 |
+
"grad_norm": 0.3895565844023344,
|
| 5995 |
+
"learning_rate": 5.4786939679026e-06,
|
| 5996 |
+
"loss": 2.1742,
|
| 5997 |
+
"step": 845
|
| 5998 |
+
},
|
| 5999 |
+
{
|
| 6000 |
+
"epoch": 0.8959491660047657,
|
| 6001 |
+
"grad_norm": 0.42224606882848037,
|
| 6002 |
+
"learning_rate": 5.450500556173527e-06,
|
| 6003 |
+
"loss": 2.0029,
|
| 6004 |
+
"step": 846
|
| 6005 |
+
},
|
| 6006 |
+
{
|
| 6007 |
+
"epoch": 0.8970082075721472,
|
| 6008 |
+
"grad_norm": 0.4528295653901916,
|
| 6009 |
+
"learning_rate": 5.422023476802684e-06,
|
| 6010 |
+
"loss": 2.2477,
|
| 6011 |
+
"step": 847
|
| 6012 |
+
},
|
| 6013 |
+
{
|
| 6014 |
+
"epoch": 0.8980672491395287,
|
| 6015 |
+
"grad_norm": 0.42318696679446294,
|
| 6016 |
+
"learning_rate": 5.393258426966292e-06,
|
| 6017 |
+
"loss": 2.2269,
|
| 6018 |
+
"step": 848
|
| 6019 |
+
},
|
| 6020 |
+
{
|
| 6021 |
+
"epoch": 0.8991262907069102,
|
| 6022 |
+
"grad_norm": 0.5825734030559547,
|
| 6023 |
+
"learning_rate": 5.36420101637493e-06,
|
| 6024 |
+
"loss": 1.9258,
|
| 6025 |
+
"step": 849
|
| 6026 |
+
},
|
| 6027 |
+
{
|
| 6028 |
+
"epoch": 0.9001853322742918,
|
| 6029 |
+
"grad_norm": 0.5182408045493935,
|
| 6030 |
+
"learning_rate": 5.334846765039727e-06,
|
| 6031 |
+
"loss": 2.3554,
|
| 6032 |
+
"step": 850
|
| 6033 |
+
},
|
| 6034 |
+
{
|
| 6035 |
+
"epoch": 0.9012443738416733,
|
| 6036 |
+
"grad_norm": 0.38168303079212984,
|
| 6037 |
+
"learning_rate": 5.305191100969766e-06,
|
| 6038 |
+
"loss": 2.21,
|
| 6039 |
+
"step": 851
|
| 6040 |
+
},
|
| 6041 |
+
{
|
| 6042 |
+
"epoch": 0.9023034154090548,
|
| 6043 |
+
"grad_norm": 0.5296996772260512,
|
| 6044 |
+
"learning_rate": 5.275229357798165e-06,
|
| 6045 |
+
"loss": 2.1481,
|
| 6046 |
+
"step": 852
|
| 6047 |
+
},
|
| 6048 |
+
{
|
| 6049 |
+
"epoch": 0.9033624569764364,
|
| 6050 |
+
"grad_norm": 0.44191291244492953,
|
| 6051 |
+
"learning_rate": 5.244956772334294e-06,
|
| 6052 |
+
"loss": 2.292,
|
| 6053 |
+
"step": 853
|
| 6054 |
+
},
|
| 6055 |
+
{
|
| 6056 |
+
"epoch": 0.9044214985438178,
|
| 6057 |
+
"grad_norm": 0.39411649769962676,
|
| 6058 |
+
"learning_rate": 5.214368482039398e-06,
|
| 6059 |
+
"loss": 2.1063,
|
| 6060 |
+
"step": 854
|
| 6061 |
+
},
|
| 6062 |
+
{
|
| 6063 |
+
"epoch": 0.9054805401111994,
|
| 6064 |
+
"grad_norm": 0.4979125672714714,
|
| 6065 |
+
"learning_rate": 5.18345952242283e-06,
|
| 6066 |
+
"loss": 2.1021,
|
| 6067 |
+
"step": 855
|
| 6068 |
+
},
|
| 6069 |
+
{
|
| 6070 |
+
"epoch": 0.9054805401111994,
|
| 6071 |
+
"eval_loss": 2.1856369972229004,
|
| 6072 |
+
"eval_runtime": 560.4973,
|
| 6073 |
+
"eval_samples_per_second": 0.58,
|
| 6074 |
+
"eval_steps_per_second": 0.291,
|
| 6075 |
+
"step": 855
|
| 6076 |
}
|
| 6077 |
],
|
| 6078 |
"logging_steps": 1,
|
|
|
|
| 6092 |
"attributes": {}
|
| 6093 |
}
|
| 6094 |
},
|
| 6095 |
+
"total_flos": 3.9599464066842624e+17,
|
| 6096 |
"train_batch_size": 1,
|
| 6097 |
"trial_name": null,
|
| 6098 |
"trial_params": null
|