Training in progress, step 153000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 222485192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:136c76df1e01333390cbfdb110ceb8645de402aa2f9bd1d1073c2092f0435997
|
| 3 |
size 222485192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 445094091
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06001f506fada90ae093c72a8c5895e578d9ad3a914dc5e4d6c327451c6c0591
|
| 3 |
size 445094091
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:703fc4033d7149e1bb71c646735942adff51d66cbb62134a1d420c80d1aea545
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -106408,6 +106408,706 @@
|
|
| 106408 |
"learning_rate": 0.00015721189632451183,
|
| 106409 |
"loss": 0.4339,
|
| 106410 |
"step": 152000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106411 |
}
|
| 106412 |
],
|
| 106413 |
"logging_steps": 10,
|
|
@@ -106427,7 +107127,7 @@
|
|
| 106427 |
"attributes": {}
|
| 106428 |
}
|
| 106429 |
},
|
| 106430 |
-
"total_flos": 1.
|
| 106431 |
"train_batch_size": 2048,
|
| 106432 |
"trial_name": null,
|
| 106433 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.1266814123044155,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 153000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 106408 |
"learning_rate": 0.00015721189632451183,
|
| 106409 |
"loss": 0.4339,
|
| 106410 |
"step": 152000
|
| 106411 |
+
},
|
| 106412 |
+
{
|
| 106413 |
+
"epoch": 1.1226263619234866,
|
| 106414 |
+
"grad_norm": 0.21575403213500977,
|
| 106415 |
+
"learning_rate": 0.00015718190208754518,
|
| 106416 |
+
"loss": 0.4372,
|
| 106417 |
+
"step": 152010
|
| 106418 |
+
},
|
| 106419 |
+
{
|
| 106420 |
+
"epoch": 1.1226673220283443,
|
| 106421 |
+
"grad_norm": 0.21100935339927673,
|
| 106422 |
+
"learning_rate": 0.00015715190940017844,
|
| 106423 |
+
"loss": 0.4255,
|
| 106424 |
+
"step": 152020
|
| 106425 |
+
},
|
| 106426 |
+
{
|
| 106427 |
+
"epoch": 1.1227082821332022,
|
| 106428 |
+
"grad_norm": 0.1999680995941162,
|
| 106429 |
+
"learning_rate": 0.00015712191826291233,
|
| 106430 |
+
"loss": 0.4168,
|
| 106431 |
+
"step": 152030
|
| 106432 |
+
},
|
| 106433 |
+
{
|
| 106434 |
+
"epoch": 1.12274924223806,
|
| 106435 |
+
"grad_norm": 0.2585766315460205,
|
| 106436 |
+
"learning_rate": 0.00015709192867624755,
|
| 106437 |
+
"loss": 0.41,
|
| 106438 |
+
"step": 152040
|
| 106439 |
+
},
|
| 106440 |
+
{
|
| 106441 |
+
"epoch": 1.122790202342918,
|
| 106442 |
+
"grad_norm": 0.2113899439573288,
|
| 106443 |
+
"learning_rate": 0.00015706194064068487,
|
| 106444 |
+
"loss": 0.4045,
|
| 106445 |
+
"step": 152050
|
| 106446 |
+
},
|
| 106447 |
+
{
|
| 106448 |
+
"epoch": 1.1228311624477758,
|
| 106449 |
+
"grad_norm": 0.20225553214550018,
|
| 106450 |
+
"learning_rate": 0.00015703195415672484,
|
| 106451 |
+
"loss": 0.3972,
|
| 106452 |
+
"step": 152060
|
| 106453 |
+
},
|
| 106454 |
+
{
|
| 106455 |
+
"epoch": 1.1228721225526337,
|
| 106456 |
+
"grad_norm": 0.19689704477787018,
|
| 106457 |
+
"learning_rate": 0.00015700196922486807,
|
| 106458 |
+
"loss": 0.385,
|
| 106459 |
+
"step": 152070
|
| 106460 |
+
},
|
| 106461 |
+
{
|
| 106462 |
+
"epoch": 1.1229130826574916,
|
| 106463 |
+
"grad_norm": 0.21756672859191895,
|
| 106464 |
+
"learning_rate": 0.00015697198584561524,
|
| 106465 |
+
"loss": 0.3743,
|
| 106466 |
+
"step": 152080
|
| 106467 |
+
},
|
| 106468 |
+
{
|
| 106469 |
+
"epoch": 1.1229540427623494,
|
| 106470 |
+
"grad_norm": 0.2084749937057495,
|
| 106471 |
+
"learning_rate": 0.0001569420040194668,
|
| 106472 |
+
"loss": 0.3669,
|
| 106473 |
+
"step": 152090
|
| 106474 |
+
},
|
| 106475 |
+
{
|
| 106476 |
+
"epoch": 1.1229950028672073,
|
| 106477 |
+
"grad_norm": 0.20592711865901947,
|
| 106478 |
+
"learning_rate": 0.00015691202374692347,
|
| 106479 |
+
"loss": 0.3539,
|
| 106480 |
+
"step": 152100
|
| 106481 |
+
},
|
| 106482 |
+
{
|
| 106483 |
+
"epoch": 1.1230359629720652,
|
| 106484 |
+
"grad_norm": 0.19951869547367096,
|
| 106485 |
+
"learning_rate": 0.00015688204502848564,
|
| 106486 |
+
"loss": 0.3473,
|
| 106487 |
+
"step": 152110
|
| 106488 |
+
},
|
| 106489 |
+
{
|
| 106490 |
+
"epoch": 1.123076923076923,
|
| 106491 |
+
"grad_norm": 0.21034152805805206,
|
| 106492 |
+
"learning_rate": 0.00015685206786465383,
|
| 106493 |
+
"loss": 0.3277,
|
| 106494 |
+
"step": 152120
|
| 106495 |
+
},
|
| 106496 |
+
{
|
| 106497 |
+
"epoch": 1.123117883181781,
|
| 106498 |
+
"grad_norm": 0.20541854202747345,
|
| 106499 |
+
"learning_rate": 0.00015682209225592863,
|
| 106500 |
+
"loss": 0.321,
|
| 106501 |
+
"step": 152130
|
| 106502 |
+
},
|
| 106503 |
+
{
|
| 106504 |
+
"epoch": 1.1231588432866388,
|
| 106505 |
+
"grad_norm": 0.212013378739357,
|
| 106506 |
+
"learning_rate": 0.00015679211820281028,
|
| 106507 |
+
"loss": 0.3059,
|
| 106508 |
+
"step": 152140
|
| 106509 |
+
},
|
| 106510 |
+
{
|
| 106511 |
+
"epoch": 1.1231998033914967,
|
| 106512 |
+
"grad_norm": 0.19732996821403503,
|
| 106513 |
+
"learning_rate": 0.00015676214570579933,
|
| 106514 |
+
"loss": 0.2932,
|
| 106515 |
+
"step": 152150
|
| 106516 |
+
},
|
| 106517 |
+
{
|
| 106518 |
+
"epoch": 1.1232407634963546,
|
| 106519 |
+
"grad_norm": 0.19249044358730316,
|
| 106520 |
+
"learning_rate": 0.0001567321747653961,
|
| 106521 |
+
"loss": 0.2744,
|
| 106522 |
+
"step": 152160
|
| 106523 |
+
},
|
| 106524 |
+
{
|
| 106525 |
+
"epoch": 1.1232817236012125,
|
| 106526 |
+
"grad_norm": 0.18894243240356445,
|
| 106527 |
+
"learning_rate": 0.00015670220538210102,
|
| 106528 |
+
"loss": 0.2646,
|
| 106529 |
+
"step": 152170
|
| 106530 |
+
},
|
| 106531 |
+
{
|
| 106532 |
+
"epoch": 1.1233226837060704,
|
| 106533 |
+
"grad_norm": 0.20343731343746185,
|
| 106534 |
+
"learning_rate": 0.00015667223755641435,
|
| 106535 |
+
"loss": 0.2584,
|
| 106536 |
+
"step": 152180
|
| 106537 |
+
},
|
| 106538 |
+
{
|
| 106539 |
+
"epoch": 1.1233636438109282,
|
| 106540 |
+
"grad_norm": 0.20398001372814178,
|
| 106541 |
+
"learning_rate": 0.00015664227128883655,
|
| 106542 |
+
"loss": 0.2443,
|
| 106543 |
+
"step": 152190
|
| 106544 |
+
},
|
| 106545 |
+
{
|
| 106546 |
+
"epoch": 1.1234046039157861,
|
| 106547 |
+
"grad_norm": 0.1968417465686798,
|
| 106548 |
+
"learning_rate": 0.00015661230657986784,
|
| 106549 |
+
"loss": 0.2242,
|
| 106550 |
+
"step": 152200
|
| 106551 |
+
},
|
| 106552 |
+
{
|
| 106553 |
+
"epoch": 1.123445564020644,
|
| 106554 |
+
"grad_norm": 0.17249369621276855,
|
| 106555 |
+
"learning_rate": 0.00015658234343000836,
|
| 106556 |
+
"loss": 0.2158,
|
| 106557 |
+
"step": 152210
|
| 106558 |
+
},
|
| 106559 |
+
{
|
| 106560 |
+
"epoch": 1.1234865241255019,
|
| 106561 |
+
"grad_norm": 0.1990642249584198,
|
| 106562 |
+
"learning_rate": 0.00015655238183975845,
|
| 106563 |
+
"loss": 0.2143,
|
| 106564 |
+
"step": 152220
|
| 106565 |
+
},
|
| 106566 |
+
{
|
| 106567 |
+
"epoch": 1.1235274842303595,
|
| 106568 |
+
"grad_norm": 0.19265955686569214,
|
| 106569 |
+
"learning_rate": 0.0001565224218096183,
|
| 106570 |
+
"loss": 0.2199,
|
| 106571 |
+
"step": 152230
|
| 106572 |
+
},
|
| 106573 |
+
{
|
| 106574 |
+
"epoch": 1.1235684443352174,
|
| 106575 |
+
"grad_norm": 0.18196046352386475,
|
| 106576 |
+
"learning_rate": 0.00015649246334008816,
|
| 106577 |
+
"loss": 0.272,
|
| 106578 |
+
"step": 152240
|
| 106579 |
+
},
|
| 106580 |
+
{
|
| 106581 |
+
"epoch": 1.1236094044400753,
|
| 106582 |
+
"grad_norm": 0.21095934510231018,
|
| 106583 |
+
"learning_rate": 0.00015646250643166804,
|
| 106584 |
+
"loss": 0.3358,
|
| 106585 |
+
"step": 152250
|
| 106586 |
+
},
|
| 106587 |
+
{
|
| 106588 |
+
"epoch": 1.1236503645449332,
|
| 106589 |
+
"grad_norm": 0.21196311712265015,
|
| 106590 |
+
"learning_rate": 0.00015643255108485822,
|
| 106591 |
+
"loss": 0.3869,
|
| 106592 |
+
"step": 152260
|
| 106593 |
+
},
|
| 106594 |
+
{
|
| 106595 |
+
"epoch": 1.123691324649791,
|
| 106596 |
+
"grad_norm": 0.2193775326013565,
|
| 106597 |
+
"learning_rate": 0.00015640259730015883,
|
| 106598 |
+
"loss": 0.4176,
|
| 106599 |
+
"step": 152270
|
| 106600 |
+
},
|
| 106601 |
+
{
|
| 106602 |
+
"epoch": 1.123732284754649,
|
| 106603 |
+
"grad_norm": 0.23009201884269714,
|
| 106604 |
+
"learning_rate": 0.00015637264507806976,
|
| 106605 |
+
"loss": 0.4597,
|
| 106606 |
+
"step": 152280
|
| 106607 |
+
},
|
| 106608 |
+
{
|
| 106609 |
+
"epoch": 1.1237732448595068,
|
| 106610 |
+
"grad_norm": 0.22631536424160004,
|
| 106611 |
+
"learning_rate": 0.0001563426944190912,
|
| 106612 |
+
"loss": 0.4829,
|
| 106613 |
+
"step": 152290
|
| 106614 |
+
},
|
| 106615 |
+
{
|
| 106616 |
+
"epoch": 1.1238142049643647,
|
| 106617 |
+
"grad_norm": 0.22960945963859558,
|
| 106618 |
+
"learning_rate": 0.00015631274532372307,
|
| 106619 |
+
"loss": 0.4984,
|
| 106620 |
+
"step": 152300
|
| 106621 |
+
},
|
| 106622 |
+
{
|
| 106623 |
+
"epoch": 1.1238551650692226,
|
| 106624 |
+
"grad_norm": 0.22582904994487762,
|
| 106625 |
+
"learning_rate": 0.00015628279779246552,
|
| 106626 |
+
"loss": 0.5136,
|
| 106627 |
+
"step": 152310
|
| 106628 |
+
},
|
| 106629 |
+
{
|
| 106630 |
+
"epoch": 1.1238961251740804,
|
| 106631 |
+
"grad_norm": 0.25222963094711304,
|
| 106632 |
+
"learning_rate": 0.00015625285182581844,
|
| 106633 |
+
"loss": 0.5348,
|
| 106634 |
+
"step": 152320
|
| 106635 |
+
},
|
| 106636 |
+
{
|
| 106637 |
+
"epoch": 1.1239370852789383,
|
| 106638 |
+
"grad_norm": 0.24137544631958008,
|
| 106639 |
+
"learning_rate": 0.00015622290742428173,
|
| 106640 |
+
"loss": 0.5435,
|
| 106641 |
+
"step": 152330
|
| 106642 |
+
},
|
| 106643 |
+
{
|
| 106644 |
+
"epoch": 1.1239780453837962,
|
| 106645 |
+
"grad_norm": 0.2230043262243271,
|
| 106646 |
+
"learning_rate": 0.00015619296458835557,
|
| 106647 |
+
"loss": 0.5384,
|
| 106648 |
+
"step": 152340
|
| 106649 |
+
},
|
| 106650 |
+
{
|
| 106651 |
+
"epoch": 1.124019005488654,
|
| 106652 |
+
"grad_norm": 0.23206926882266998,
|
| 106653 |
+
"learning_rate": 0.0001561630233185395,
|
| 106654 |
+
"loss": 0.5469,
|
| 106655 |
+
"step": 152350
|
| 106656 |
+
},
|
| 106657 |
+
{
|
| 106658 |
+
"epoch": 1.124059965593512,
|
| 106659 |
+
"grad_norm": 0.2324897199869156,
|
| 106660 |
+
"learning_rate": 0.00015613308361533352,
|
| 106661 |
+
"loss": 0.5541,
|
| 106662 |
+
"step": 152360
|
| 106663 |
+
},
|
| 106664 |
+
{
|
| 106665 |
+
"epoch": 1.1241009256983698,
|
| 106666 |
+
"grad_norm": 0.22080039978027344,
|
| 106667 |
+
"learning_rate": 0.00015610314547923754,
|
| 106668 |
+
"loss": 0.5526,
|
| 106669 |
+
"step": 152370
|
| 106670 |
+
},
|
| 106671 |
+
{
|
| 106672 |
+
"epoch": 1.1241418858032277,
|
| 106673 |
+
"grad_norm": 0.2413584589958191,
|
| 106674 |
+
"learning_rate": 0.0001560732089107513,
|
| 106675 |
+
"loss": 0.5607,
|
| 106676 |
+
"step": 152380
|
| 106677 |
+
},
|
| 106678 |
+
{
|
| 106679 |
+
"epoch": 1.1241828459080856,
|
| 106680 |
+
"grad_norm": 0.22935831546783447,
|
| 106681 |
+
"learning_rate": 0.0001560432739103747,
|
| 106682 |
+
"loss": 0.5558,
|
| 106683 |
+
"step": 152390
|
| 106684 |
+
},
|
| 106685 |
+
{
|
| 106686 |
+
"epoch": 1.1242238060129435,
|
| 106687 |
+
"grad_norm": 0.24277865886688232,
|
| 106688 |
+
"learning_rate": 0.00015601334047860737,
|
| 106689 |
+
"loss": 0.5689,
|
| 106690 |
+
"step": 152400
|
| 106691 |
+
},
|
| 106692 |
+
{
|
| 106693 |
+
"epoch": 1.1242647661178014,
|
| 106694 |
+
"grad_norm": 0.23572035133838654,
|
| 106695 |
+
"learning_rate": 0.00015598340861594924,
|
| 106696 |
+
"loss": 0.5617,
|
| 106697 |
+
"step": 152410
|
| 106698 |
+
},
|
| 106699 |
+
{
|
| 106700 |
+
"epoch": 1.1243057262226592,
|
| 106701 |
+
"grad_norm": 0.25575461983680725,
|
| 106702 |
+
"learning_rate": 0.0001559534783228998,
|
| 106703 |
+
"loss": 0.5631,
|
| 106704 |
+
"step": 152420
|
| 106705 |
+
},
|
| 106706 |
+
{
|
| 106707 |
+
"epoch": 1.124346686327517,
|
| 106708 |
+
"grad_norm": 0.22678613662719727,
|
| 106709 |
+
"learning_rate": 0.00015592354959995884,
|
| 106710 |
+
"loss": 0.5644,
|
| 106711 |
+
"step": 152430
|
| 106712 |
+
},
|
| 106713 |
+
{
|
| 106714 |
+
"epoch": 1.1243876464323748,
|
| 106715 |
+
"grad_norm": 0.22008901834487915,
|
| 106716 |
+
"learning_rate": 0.00015589362244762601,
|
| 106717 |
+
"loss": 0.5612,
|
| 106718 |
+
"step": 152440
|
| 106719 |
+
},
|
| 106720 |
+
{
|
| 106721 |
+
"epoch": 1.1244286065372326,
|
| 106722 |
+
"grad_norm": 0.23076428472995758,
|
| 106723 |
+
"learning_rate": 0.00015586369686640094,
|
| 106724 |
+
"loss": 0.5678,
|
| 106725 |
+
"step": 152450
|
| 106726 |
+
},
|
| 106727 |
+
{
|
| 106728 |
+
"epoch": 1.1244695666420905,
|
| 106729 |
+
"grad_norm": 0.2724132835865021,
|
| 106730 |
+
"learning_rate": 0.0001558337728567833,
|
| 106731 |
+
"loss": 0.5601,
|
| 106732 |
+
"step": 152460
|
| 106733 |
+
},
|
| 106734 |
+
{
|
| 106735 |
+
"epoch": 1.1245105267469484,
|
| 106736 |
+
"grad_norm": 0.23742075264453888,
|
| 106737 |
+
"learning_rate": 0.00015580385041927253,
|
| 106738 |
+
"loss": 0.5624,
|
| 106739 |
+
"step": 152470
|
| 106740 |
+
},
|
| 106741 |
+
{
|
| 106742 |
+
"epoch": 1.1245514868518063,
|
| 106743 |
+
"grad_norm": 0.24539630115032196,
|
| 106744 |
+
"learning_rate": 0.00015577392955436843,
|
| 106745 |
+
"loss": 0.5599,
|
| 106746 |
+
"step": 152480
|
| 106747 |
+
},
|
| 106748 |
+
{
|
| 106749 |
+
"epoch": 1.1245924469566642,
|
| 106750 |
+
"grad_norm": 0.24014654755592346,
|
| 106751 |
+
"learning_rate": 0.00015574401026257029,
|
| 106752 |
+
"loss": 0.562,
|
| 106753 |
+
"step": 152490
|
| 106754 |
+
},
|
| 106755 |
+
{
|
| 106756 |
+
"epoch": 1.124633407061522,
|
| 106757 |
+
"grad_norm": 0.22930192947387695,
|
| 106758 |
+
"learning_rate": 0.00015571409254437765,
|
| 106759 |
+
"loss": 0.5637,
|
| 106760 |
+
"step": 152500
|
| 106761 |
+
},
|
| 106762 |
+
{
|
| 106763 |
+
"epoch": 1.12467436716638,
|
| 106764 |
+
"grad_norm": 0.2182847410440445,
|
| 106765 |
+
"learning_rate": 0.00015568417640029008,
|
| 106766 |
+
"loss": 0.557,
|
| 106767 |
+
"step": 152510
|
| 106768 |
+
},
|
| 106769 |
+
{
|
| 106770 |
+
"epoch": 1.1247153272712378,
|
| 106771 |
+
"grad_norm": 0.23275041580200195,
|
| 106772 |
+
"learning_rate": 0.00015565426183080698,
|
| 106773 |
+
"loss": 0.5562,
|
| 106774 |
+
"step": 152520
|
| 106775 |
+
},
|
| 106776 |
+
{
|
| 106777 |
+
"epoch": 1.1247562873760957,
|
| 106778 |
+
"grad_norm": 0.24153053760528564,
|
| 106779 |
+
"learning_rate": 0.00015562434883642777,
|
| 106780 |
+
"loss": 0.5614,
|
| 106781 |
+
"step": 152530
|
| 106782 |
+
},
|
| 106783 |
+
{
|
| 106784 |
+
"epoch": 1.1247972474809536,
|
| 106785 |
+
"grad_norm": 0.23341676592826843,
|
| 106786 |
+
"learning_rate": 0.00015559443741765182,
|
| 106787 |
+
"loss": 0.5611,
|
| 106788 |
+
"step": 152540
|
| 106789 |
+
},
|
| 106790 |
+
{
|
| 106791 |
+
"epoch": 1.1248382075858114,
|
| 106792 |
+
"grad_norm": 0.2416403889656067,
|
| 106793 |
+
"learning_rate": 0.00015556452757497868,
|
| 106794 |
+
"loss": 0.5661,
|
| 106795 |
+
"step": 152550
|
| 106796 |
+
},
|
| 106797 |
+
{
|
| 106798 |
+
"epoch": 1.1248791676906693,
|
| 106799 |
+
"grad_norm": 0.2251540720462799,
|
| 106800 |
+
"learning_rate": 0.0001555346193089075,
|
| 106801 |
+
"loss": 0.5587,
|
| 106802 |
+
"step": 152560
|
| 106803 |
+
},
|
| 106804 |
+
{
|
| 106805 |
+
"epoch": 1.1249201277955272,
|
| 106806 |
+
"grad_norm": 0.23912055790424347,
|
| 106807 |
+
"learning_rate": 0.00015550471261993755,
|
| 106808 |
+
"loss": 0.5508,
|
| 106809 |
+
"step": 152570
|
| 106810 |
+
},
|
| 106811 |
+
{
|
| 106812 |
+
"epoch": 1.124961087900385,
|
| 106813 |
+
"grad_norm": 0.24343754351139069,
|
| 106814 |
+
"learning_rate": 0.00015547480750856835,
|
| 106815 |
+
"loss": 0.5569,
|
| 106816 |
+
"step": 152580
|
| 106817 |
+
},
|
| 106818 |
+
{
|
| 106819 |
+
"epoch": 1.125002048005243,
|
| 106820 |
+
"grad_norm": 0.2361503690481186,
|
| 106821 |
+
"learning_rate": 0.0001554449039752989,
|
| 106822 |
+
"loss": 0.5644,
|
| 106823 |
+
"step": 152590
|
| 106824 |
+
},
|
| 106825 |
+
{
|
| 106826 |
+
"epoch": 1.1250430081101008,
|
| 106827 |
+
"grad_norm": 0.22848433256149292,
|
| 106828 |
+
"learning_rate": 0.00015541500202062873,
|
| 106829 |
+
"loss": 0.5584,
|
| 106830 |
+
"step": 152600
|
| 106831 |
+
},
|
| 106832 |
+
{
|
| 106833 |
+
"epoch": 1.1250839682149587,
|
| 106834 |
+
"grad_norm": 0.2590723931789398,
|
| 106835 |
+
"learning_rate": 0.0001553851016450568,
|
| 106836 |
+
"loss": 0.566,
|
| 106837 |
+
"step": 152610
|
| 106838 |
+
},
|
| 106839 |
+
{
|
| 106840 |
+
"epoch": 1.1251249283198166,
|
| 106841 |
+
"grad_norm": 0.24066004157066345,
|
| 106842 |
+
"learning_rate": 0.00015535520284908253,
|
| 106843 |
+
"loss": 0.5611,
|
| 106844 |
+
"step": 152620
|
| 106845 |
+
},
|
| 106846 |
+
{
|
| 106847 |
+
"epoch": 1.1251658884246742,
|
| 106848 |
+
"grad_norm": 0.23049475252628326,
|
| 106849 |
+
"learning_rate": 0.0001553253056332049,
|
| 106850 |
+
"loss": 0.558,
|
| 106851 |
+
"step": 152630
|
| 106852 |
+
},
|
| 106853 |
+
{
|
| 106854 |
+
"epoch": 1.1252068485295323,
|
| 106855 |
+
"grad_norm": 0.23786190152168274,
|
| 106856 |
+
"learning_rate": 0.00015529540999792302,
|
| 106857 |
+
"loss": 0.5689,
|
| 106858 |
+
"step": 152640
|
| 106859 |
+
},
|
| 106860 |
+
{
|
| 106861 |
+
"epoch": 1.12524780863439,
|
| 106862 |
+
"grad_norm": 0.223163902759552,
|
| 106863 |
+
"learning_rate": 0.00015526551594373616,
|
| 106864 |
+
"loss": 0.5611,
|
| 106865 |
+
"step": 152650
|
| 106866 |
+
},
|
| 106867 |
+
{
|
| 106868 |
+
"epoch": 1.1252887687392479,
|
| 106869 |
+
"grad_norm": 0.24785561859607697,
|
| 106870 |
+
"learning_rate": 0.00015523562347114327,
|
| 106871 |
+
"loss": 0.5631,
|
| 106872 |
+
"step": 152660
|
| 106873 |
+
},
|
| 106874 |
+
{
|
| 106875 |
+
"epoch": 1.1253297288441058,
|
| 106876 |
+
"grad_norm": 0.22849705815315247,
|
| 106877 |
+
"learning_rate": 0.00015520573258064353,
|
| 106878 |
+
"loss": 0.5621,
|
| 106879 |
+
"step": 152670
|
| 106880 |
+
},
|
| 106881 |
+
{
|
| 106882 |
+
"epoch": 1.1253706889489636,
|
| 106883 |
+
"grad_norm": 0.2411494106054306,
|
| 106884 |
+
"learning_rate": 0.00015517584327273578,
|
| 106885 |
+
"loss": 0.5631,
|
| 106886 |
+
"step": 152680
|
| 106887 |
+
},
|
| 106888 |
+
{
|
| 106889 |
+
"epoch": 1.1254116490538215,
|
| 106890 |
+
"grad_norm": 0.22378501296043396,
|
| 106891 |
+
"learning_rate": 0.00015514595554791928,
|
| 106892 |
+
"loss": 0.5535,
|
| 106893 |
+
"step": 152690
|
| 106894 |
+
},
|
| 106895 |
+
{
|
| 106896 |
+
"epoch": 1.1254526091586794,
|
| 106897 |
+
"grad_norm": 0.23733602464199066,
|
| 106898 |
+
"learning_rate": 0.0001551160694066928,
|
| 106899 |
+
"loss": 0.5567,
|
| 106900 |
+
"step": 152700
|
| 106901 |
+
},
|
| 106902 |
+
{
|
| 106903 |
+
"epoch": 1.1254935692635373,
|
| 106904 |
+
"grad_norm": 0.26041272282600403,
|
| 106905 |
+
"learning_rate": 0.00015508618484955534,
|
| 106906 |
+
"loss": 0.5536,
|
| 106907 |
+
"step": 152710
|
| 106908 |
+
},
|
| 106909 |
+
{
|
| 106910 |
+
"epoch": 1.1255345293683952,
|
| 106911 |
+
"grad_norm": 0.2391430139541626,
|
| 106912 |
+
"learning_rate": 0.0001550563018770058,
|
| 106913 |
+
"loss": 0.5605,
|
| 106914 |
+
"step": 152720
|
| 106915 |
+
},
|
| 106916 |
+
{
|
| 106917 |
+
"epoch": 1.125575489473253,
|
| 106918 |
+
"grad_norm": 0.23212002217769623,
|
| 106919 |
+
"learning_rate": 0.00015502642048954312,
|
| 106920 |
+
"loss": 0.5648,
|
| 106921 |
+
"step": 152730
|
| 106922 |
+
},
|
| 106923 |
+
{
|
| 106924 |
+
"epoch": 1.125616449578111,
|
| 106925 |
+
"grad_norm": 0.21461953222751617,
|
| 106926 |
+
"learning_rate": 0.00015499654068766623,
|
| 106927 |
+
"loss": 0.5558,
|
| 106928 |
+
"step": 152740
|
| 106929 |
+
},
|
| 106930 |
+
{
|
| 106931 |
+
"epoch": 1.1256574096829688,
|
| 106932 |
+
"grad_norm": 0.2368936687707901,
|
| 106933 |
+
"learning_rate": 0.00015496666247187386,
|
| 106934 |
+
"loss": 0.5617,
|
| 106935 |
+
"step": 152750
|
| 106936 |
+
},
|
| 106937 |
+
{
|
| 106938 |
+
"epoch": 1.1256983697878267,
|
| 106939 |
+
"grad_norm": 0.23703087866306305,
|
| 106940 |
+
"learning_rate": 0.00015493678584266494,
|
| 106941 |
+
"loss": 0.5562,
|
| 106942 |
+
"step": 152760
|
| 106943 |
+
},
|
| 106944 |
+
{
|
| 106945 |
+
"epoch": 1.1257393298926845,
|
| 106946 |
+
"grad_norm": 0.22494572401046753,
|
| 106947 |
+
"learning_rate": 0.00015490691080053814,
|
| 106948 |
+
"loss": 0.5558,
|
| 106949 |
+
"step": 152770
|
| 106950 |
+
},
|
| 106951 |
+
{
|
| 106952 |
+
"epoch": 1.1257802899975424,
|
| 106953 |
+
"grad_norm": 0.23022456467151642,
|
| 106954 |
+
"learning_rate": 0.00015487703734599225,
|
| 106955 |
+
"loss": 0.555,
|
| 106956 |
+
"step": 152780
|
| 106957 |
+
},
|
| 106958 |
+
{
|
| 106959 |
+
"epoch": 1.1258212501024003,
|
| 106960 |
+
"grad_norm": 0.2356519103050232,
|
| 106961 |
+
"learning_rate": 0.0001548471654795261,
|
| 106962 |
+
"loss": 0.5531,
|
| 106963 |
+
"step": 152790
|
| 106964 |
+
},
|
| 106965 |
+
{
|
| 106966 |
+
"epoch": 1.1258622102072582,
|
| 106967 |
+
"grad_norm": 0.21882906556129456,
|
| 106968 |
+
"learning_rate": 0.00015481729520163825,
|
| 106969 |
+
"loss": 0.5537,
|
| 106970 |
+
"step": 152800
|
| 106971 |
+
},
|
| 106972 |
+
{
|
| 106973 |
+
"epoch": 1.125903170312116,
|
| 106974 |
+
"grad_norm": 0.23445099592208862,
|
| 106975 |
+
"learning_rate": 0.00015478742651282756,
|
| 106976 |
+
"loss": 0.5586,
|
| 106977 |
+
"step": 152810
|
| 106978 |
+
},
|
| 106979 |
+
{
|
| 106980 |
+
"epoch": 1.125944130416974,
|
| 106981 |
+
"grad_norm": 0.23974089324474335,
|
| 106982 |
+
"learning_rate": 0.00015475755941359254,
|
| 106983 |
+
"loss": 0.5556,
|
| 106984 |
+
"step": 152820
|
| 106985 |
+
},
|
| 106986 |
+
{
|
| 106987 |
+
"epoch": 1.1259850905218318,
|
| 106988 |
+
"grad_norm": 0.2311934232711792,
|
| 106989 |
+
"learning_rate": 0.00015472769390443197,
|
| 106990 |
+
"loss": 0.5555,
|
| 106991 |
+
"step": 152830
|
| 106992 |
+
},
|
| 106993 |
+
{
|
| 106994 |
+
"epoch": 1.1260260506266897,
|
| 106995 |
+
"grad_norm": 0.22662296891212463,
|
| 106996 |
+
"learning_rate": 0.00015469782998584438,
|
| 106997 |
+
"loss": 0.5466,
|
| 106998 |
+
"step": 152840
|
| 106999 |
+
},
|
| 107000 |
+
{
|
| 107001 |
+
"epoch": 1.1260670107315476,
|
| 107002 |
+
"grad_norm": 0.227142795920372,
|
| 107003 |
+
"learning_rate": 0.00015466796765832818,
|
| 107004 |
+
"loss": 0.545,
|
| 107005 |
+
"step": 152850
|
| 107006 |
+
},
|
| 107007 |
+
{
|
| 107008 |
+
"epoch": 1.1261079708364052,
|
| 107009 |
+
"grad_norm": 0.22120416164398193,
|
| 107010 |
+
"learning_rate": 0.00015463810692238224,
|
| 107011 |
+
"loss": 0.5509,
|
| 107012 |
+
"step": 152860
|
| 107013 |
+
},
|
| 107014 |
+
{
|
| 107015 |
+
"epoch": 1.1261489309412631,
|
| 107016 |
+
"grad_norm": 0.22597011923789978,
|
| 107017 |
+
"learning_rate": 0.0001546082477785048,
|
| 107018 |
+
"loss": 0.554,
|
| 107019 |
+
"step": 152870
|
| 107020 |
+
},
|
| 107021 |
+
{
|
| 107022 |
+
"epoch": 1.126189891046121,
|
| 107023 |
+
"grad_norm": 0.24797473847866058,
|
| 107024 |
+
"learning_rate": 0.00015457839022719455,
|
| 107025 |
+
"loss": 0.5597,
|
| 107026 |
+
"step": 152880
|
| 107027 |
+
},
|
| 107028 |
+
{
|
| 107029 |
+
"epoch": 1.1262308511509789,
|
| 107030 |
+
"grad_norm": 0.25205472111701965,
|
| 107031 |
+
"learning_rate": 0.0001545485342689499,
|
| 107032 |
+
"loss": 0.5519,
|
| 107033 |
+
"step": 152890
|
| 107034 |
+
},
|
| 107035 |
+
{
|
| 107036 |
+
"epoch": 1.1262718112558368,
|
| 107037 |
+
"grad_norm": 0.21713557839393616,
|
| 107038 |
+
"learning_rate": 0.00015451867990426933,
|
| 107039 |
+
"loss": 0.5521,
|
| 107040 |
+
"step": 152900
|
| 107041 |
+
},
|
| 107042 |
+
{
|
| 107043 |
+
"epoch": 1.1263127713606946,
|
| 107044 |
+
"grad_norm": 0.22947101294994354,
|
| 107045 |
+
"learning_rate": 0.0001544888271336512,
|
| 107046 |
+
"loss": 0.5572,
|
| 107047 |
+
"step": 152910
|
| 107048 |
+
},
|
| 107049 |
+
{
|
| 107050 |
+
"epoch": 1.1263537314655525,
|
| 107051 |
+
"grad_norm": 0.22842080891132355,
|
| 107052 |
+
"learning_rate": 0.00015445897595759385,
|
| 107053 |
+
"loss": 0.5555,
|
| 107054 |
+
"step": 152920
|
| 107055 |
+
},
|
| 107056 |
+
{
|
| 107057 |
+
"epoch": 1.1263946915704104,
|
| 107058 |
+
"grad_norm": 0.2273808866739273,
|
| 107059 |
+
"learning_rate": 0.00015442912637659577,
|
| 107060 |
+
"loss": 0.5538,
|
| 107061 |
+
"step": 152930
|
| 107062 |
+
},
|
| 107063 |
+
{
|
| 107064 |
+
"epoch": 1.1264356516752683,
|
| 107065 |
+
"grad_norm": 0.2290753871202469,
|
| 107066 |
+
"learning_rate": 0.00015439927839115526,
|
| 107067 |
+
"loss": 0.5474,
|
| 107068 |
+
"step": 152940
|
| 107069 |
+
},
|
| 107070 |
+
{
|
| 107071 |
+
"epoch": 1.1264766117801261,
|
| 107072 |
+
"grad_norm": 0.23517906665802002,
|
| 107073 |
+
"learning_rate": 0.0001543694320017706,
|
| 107074 |
+
"loss": 0.5495,
|
| 107075 |
+
"step": 152950
|
| 107076 |
+
},
|
| 107077 |
+
{
|
| 107078 |
+
"epoch": 1.126517571884984,
|
| 107079 |
+
"grad_norm": 0.22325338423252106,
|
| 107080 |
+
"learning_rate": 0.00015433958720894008,
|
| 107081 |
+
"loss": 0.5476,
|
| 107082 |
+
"step": 152960
|
| 107083 |
+
},
|
| 107084 |
+
{
|
| 107085 |
+
"epoch": 1.126558531989842,
|
| 107086 |
+
"grad_norm": 0.23649455606937408,
|
| 107087 |
+
"learning_rate": 0.00015430974401316208,
|
| 107088 |
+
"loss": 0.545,
|
| 107089 |
+
"step": 152970
|
| 107090 |
+
},
|
| 107091 |
+
{
|
| 107092 |
+
"epoch": 1.1265994920946998,
|
| 107093 |
+
"grad_norm": 0.232147678732872,
|
| 107094 |
+
"learning_rate": 0.00015427990241493467,
|
| 107095 |
+
"loss": 0.554,
|
| 107096 |
+
"step": 152980
|
| 107097 |
+
},
|
| 107098 |
+
{
|
| 107099 |
+
"epoch": 1.1266404521995577,
|
| 107100 |
+
"grad_norm": 0.249918133020401,
|
| 107101 |
+
"learning_rate": 0.0001542500624147561,
|
| 107102 |
+
"loss": 0.5385,
|
| 107103 |
+
"step": 152990
|
| 107104 |
+
},
|
| 107105 |
+
{
|
| 107106 |
+
"epoch": 1.1266814123044155,
|
| 107107 |
+
"grad_norm": 0.23529894649982452,
|
| 107108 |
+
"learning_rate": 0.0001542202240131246,
|
| 107109 |
+
"loss": 0.5526,
|
| 107110 |
+
"step": 153000
|
| 107111 |
}
|
| 107112 |
],
|
| 107113 |
"logging_steps": 10,
|
|
|
|
| 107127 |
"attributes": {}
|
| 107128 |
}
|
| 107129 |
},
|
| 107130 |
+
"total_flos": 1.5183189452904727e+19,
|
| 107131 |
"train_batch_size": 2048,
|
| 107132 |
"trial_name": null,
|
| 107133 |
"trial_params": null
|