Training in progress, step 16300, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 340808816
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ad22cbf1fcb8571501c12f895012cee7abbe6eee1012214b48c079a3d8a18d24
|
| 3 |
size 340808816
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 173247691
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a469a6f0e3e6af8ccbac47588ca951c29baef9062f65ae65783830c17aa52835
|
| 3 |
size 173247691
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7108ab94f8617d52d176b95881a14724face9a9e73a376ff8d0ba2273d9232f4
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -113408,6 +113408,706 @@
|
|
| 113408 |
"learning_rate": 5.139489109923234e-09,
|
| 113409 |
"loss": 0.7659933567047119,
|
| 113410 |
"step": 16200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113411 |
}
|
| 113412 |
],
|
| 113413 |
"logging_steps": 1,
|
|
@@ -113422,12 +114122,12 @@
|
|
| 113422 |
"should_evaluate": false,
|
| 113423 |
"should_log": false,
|
| 113424 |
"should_save": true,
|
| 113425 |
-
"should_training_stop":
|
| 113426 |
},
|
| 113427 |
"attributes": {}
|
| 113428 |
}
|
| 113429 |
},
|
| 113430 |
-
"total_flos": 4.
|
| 113431 |
"train_batch_size": 8,
|
| 113432 |
"trial_name": null,
|
| 113433 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 20.0,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 16300,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 113408 |
"learning_rate": 5.139489109923234e-09,
|
| 113409 |
"loss": 0.7659933567047119,
|
| 113410 |
"step": 16200
|
| 113411 |
+
},
|
| 113412 |
+
{
|
| 113413 |
+
"epoch": 19.878527607361963,
|
| 113414 |
+
"grad_norm": 0.31444302201271057,
|
| 113415 |
+
"learning_rate": 5.038224271980419e-09,
|
| 113416 |
+
"loss": 0.674436092376709,
|
| 113417 |
+
"step": 16201
|
| 113418 |
+
},
|
| 113419 |
+
{
|
| 113420 |
+
"epoch": 19.879754601226995,
|
| 113421 |
+
"grad_norm": 0.2814277112483978,
|
| 113422 |
+
"learning_rate": 4.937966909657199e-09,
|
| 113423 |
+
"loss": 0.6168652772903442,
|
| 113424 |
+
"step": 16202
|
| 113425 |
+
},
|
| 113426 |
+
{
|
| 113427 |
+
"epoch": 19.880981595092024,
|
| 113428 |
+
"grad_norm": 0.29662981629371643,
|
| 113429 |
+
"learning_rate": 4.838717026997563e-09,
|
| 113430 |
+
"loss": 0.5741356015205383,
|
| 113431 |
+
"step": 16203
|
| 113432 |
+
},
|
| 113433 |
+
{
|
| 113434 |
+
"epoch": 19.882208588957056,
|
| 113435 |
+
"grad_norm": 0.25058355927467346,
|
| 113436 |
+
"learning_rate": 4.74047462800109e-09,
|
| 113437 |
+
"loss": 0.5623442530632019,
|
| 113438 |
+
"step": 16204
|
| 113439 |
+
},
|
| 113440 |
+
{
|
| 113441 |
+
"epoch": 19.883435582822084,
|
| 113442 |
+
"grad_norm": 0.3032751977443695,
|
| 113443 |
+
"learning_rate": 4.6432397166285e-09,
|
| 113444 |
+
"loss": 0.7177292108535767,
|
| 113445 |
+
"step": 16205
|
| 113446 |
+
},
|
| 113447 |
+
{
|
| 113448 |
+
"epoch": 19.884662576687116,
|
| 113449 |
+
"grad_norm": 0.2613092064857483,
|
| 113450 |
+
"learning_rate": 4.547012296796105e-09,
|
| 113451 |
+
"loss": 0.705600380897522,
|
| 113452 |
+
"step": 16206
|
| 113453 |
+
},
|
| 113454 |
+
{
|
| 113455 |
+
"epoch": 19.88588957055215,
|
| 113456 |
+
"grad_norm": 0.25566813349723816,
|
| 113457 |
+
"learning_rate": 4.4517923723841335e-09,
|
| 113458 |
+
"loss": 0.5601306557655334,
|
| 113459 |
+
"step": 16207
|
| 113460 |
+
},
|
| 113461 |
+
{
|
| 113462 |
+
"epoch": 19.887116564417177,
|
| 113463 |
+
"grad_norm": 0.23225629329681396,
|
| 113464 |
+
"learning_rate": 4.357579947233959e-09,
|
| 113465 |
+
"loss": 0.539310097694397,
|
| 113466 |
+
"step": 16208
|
| 113467 |
+
},
|
| 113468 |
+
{
|
| 113469 |
+
"epoch": 19.88834355828221,
|
| 113470 |
+
"grad_norm": 0.26849034428596497,
|
| 113471 |
+
"learning_rate": 4.26437502513699e-09,
|
| 113472 |
+
"loss": 0.6267234683036804,
|
| 113473 |
+
"step": 16209
|
| 113474 |
+
},
|
| 113475 |
+
{
|
| 113476 |
+
"epoch": 19.88957055214724,
|
| 113477 |
+
"grad_norm": 0.29642733931541443,
|
| 113478 |
+
"learning_rate": 4.172177609854111e-09,
|
| 113479 |
+
"loss": 0.7983720302581787,
|
| 113480 |
+
"step": 16210
|
| 113481 |
+
},
|
| 113482 |
+
{
|
| 113483 |
+
"epoch": 19.89079754601227,
|
| 113484 |
+
"grad_norm": 0.3397233784198761,
|
| 113485 |
+
"learning_rate": 4.080987705099016e-09,
|
| 113486 |
+
"loss": 0.4387090802192688,
|
| 113487 |
+
"step": 16211
|
| 113488 |
+
},
|
| 113489 |
+
{
|
| 113490 |
+
"epoch": 19.8920245398773,
|
| 113491 |
+
"grad_norm": 0.24321438372135162,
|
| 113492 |
+
"learning_rate": 3.990805314549318e-09,
|
| 113493 |
+
"loss": 0.44809988141059875,
|
| 113494 |
+
"step": 16212
|
| 113495 |
+
},
|
| 113496 |
+
{
|
| 113497 |
+
"epoch": 19.89325153374233,
|
| 113498 |
+
"grad_norm": 0.3020300567150116,
|
| 113499 |
+
"learning_rate": 3.901630441840998e-09,
|
| 113500 |
+
"loss": 0.6333901882171631,
|
| 113501 |
+
"step": 16213
|
| 113502 |
+
},
|
| 113503 |
+
{
|
| 113504 |
+
"epoch": 19.89447852760736,
|
| 113505 |
+
"grad_norm": 0.23879368603229523,
|
| 113506 |
+
"learning_rate": 3.8134630905656274e-09,
|
| 113507 |
+
"loss": 0.3145845830440521,
|
| 113508 |
+
"step": 16214
|
| 113509 |
+
},
|
| 113510 |
+
{
|
| 113511 |
+
"epoch": 19.895705521472394,
|
| 113512 |
+
"grad_norm": 0.281195729970932,
|
| 113513 |
+
"learning_rate": 3.726303264278696e-09,
|
| 113514 |
+
"loss": 0.4267631769180298,
|
| 113515 |
+
"step": 16215
|
| 113516 |
+
},
|
| 113517 |
+
{
|
| 113518 |
+
"epoch": 19.896932515337422,
|
| 113519 |
+
"grad_norm": 0.2915148437023163,
|
| 113520 |
+
"learning_rate": 3.6401509664912848e-09,
|
| 113521 |
+
"loss": 0.7021991014480591,
|
| 113522 |
+
"step": 16216
|
| 113523 |
+
},
|
| 113524 |
+
{
|
| 113525 |
+
"epoch": 19.898159509202454,
|
| 113526 |
+
"grad_norm": 0.2826932966709137,
|
| 113527 |
+
"learning_rate": 3.5550062006811656e-09,
|
| 113528 |
+
"loss": 0.6565602421760559,
|
| 113529 |
+
"step": 16217
|
| 113530 |
+
},
|
| 113531 |
+
{
|
| 113532 |
+
"epoch": 19.899386503067486,
|
| 113533 |
+
"grad_norm": 0.2678978443145752,
|
| 113534 |
+
"learning_rate": 3.4708689702733776e-09,
|
| 113535 |
+
"loss": 0.49907374382019043,
|
| 113536 |
+
"step": 16218
|
| 113537 |
+
},
|
| 113538 |
+
{
|
| 113539 |
+
"epoch": 19.900613496932515,
|
| 113540 |
+
"grad_norm": 0.2627774178981781,
|
| 113541 |
+
"learning_rate": 3.3877392786624273e-09,
|
| 113542 |
+
"loss": 0.5407178401947021,
|
| 113543 |
+
"step": 16219
|
| 113544 |
+
},
|
| 113545 |
+
{
|
| 113546 |
+
"epoch": 19.901840490797547,
|
| 113547 |
+
"grad_norm": 0.28935369849205017,
|
| 113548 |
+
"learning_rate": 3.3056171292011882e-09,
|
| 113549 |
+
"loss": 0.6748589873313904,
|
| 113550 |
+
"step": 16220
|
| 113551 |
+
},
|
| 113552 |
+
{
|
| 113553 |
+
"epoch": 19.903067484662575,
|
| 113554 |
+
"grad_norm": 0.27842453122138977,
|
| 113555 |
+
"learning_rate": 3.2245025251953496e-09,
|
| 113556 |
+
"loss": 0.7646443843841553,
|
| 113557 |
+
"step": 16221
|
| 113558 |
+
},
|
| 113559 |
+
{
|
| 113560 |
+
"epoch": 19.904294478527607,
|
| 113561 |
+
"grad_norm": 0.2733169496059418,
|
| 113562 |
+
"learning_rate": 3.1443954699200694e-09,
|
| 113563 |
+
"loss": 0.6724434494972229,
|
| 113564 |
+
"step": 16222
|
| 113565 |
+
},
|
| 113566 |
+
{
|
| 113567 |
+
"epoch": 19.90552147239264,
|
| 113568 |
+
"grad_norm": 0.29514604806900024,
|
| 113569 |
+
"learning_rate": 3.06529596659777e-09,
|
| 113570 |
+
"loss": 0.5396702289581299,
|
| 113571 |
+
"step": 16223
|
| 113572 |
+
},
|
| 113573 |
+
{
|
| 113574 |
+
"epoch": 19.906748466257667,
|
| 113575 |
+
"grad_norm": 0.2790028750896454,
|
| 113576 |
+
"learning_rate": 2.987204018420342e-09,
|
| 113577 |
+
"loss": 0.736526608467102,
|
| 113578 |
+
"step": 16224
|
| 113579 |
+
},
|
| 113580 |
+
{
|
| 113581 |
+
"epoch": 19.9079754601227,
|
| 113582 |
+
"grad_norm": 0.2966996729373932,
|
| 113583 |
+
"learning_rate": 2.9101196285352684e-09,
|
| 113584 |
+
"loss": 0.8032228946685791,
|
| 113585 |
+
"step": 16225
|
| 113586 |
+
},
|
| 113587 |
+
{
|
| 113588 |
+
"epoch": 19.90920245398773,
|
| 113589 |
+
"grad_norm": 0.24839043617248535,
|
| 113590 |
+
"learning_rate": 2.834042800051173e-09,
|
| 113591 |
+
"loss": 0.37957412004470825,
|
| 113592 |
+
"step": 16226
|
| 113593 |
+
},
|
| 113594 |
+
{
|
| 113595 |
+
"epoch": 19.91042944785276,
|
| 113596 |
+
"grad_norm": 0.2736720144748688,
|
| 113597 |
+
"learning_rate": 2.7589735360322724e-09,
|
| 113598 |
+
"loss": 0.7692750096321106,
|
| 113599 |
+
"step": 16227
|
| 113600 |
+
},
|
| 113601 |
+
{
|
| 113602 |
+
"epoch": 19.911656441717792,
|
| 113603 |
+
"grad_norm": 0.252001017332077,
|
| 113604 |
+
"learning_rate": 2.684911839503923e-09,
|
| 113605 |
+
"loss": 0.5340638160705566,
|
| 113606 |
+
"step": 16228
|
| 113607 |
+
},
|
| 113608 |
+
{
|
| 113609 |
+
"epoch": 19.91288343558282,
|
| 113610 |
+
"grad_norm": 0.26210591197013855,
|
| 113611 |
+
"learning_rate": 2.6118577134498503e-09,
|
| 113612 |
+
"loss": 0.5110554695129395,
|
| 113613 |
+
"step": 16229
|
| 113614 |
+
},
|
| 113615 |
+
{
|
| 113616 |
+
"epoch": 19.914110429447852,
|
| 113617 |
+
"grad_norm": 0.30329135060310364,
|
| 113618 |
+
"learning_rate": 2.5398111608204713e-09,
|
| 113619 |
+
"loss": 0.7488691806793213,
|
| 113620 |
+
"step": 16230
|
| 113621 |
+
},
|
| 113622 |
+
{
|
| 113623 |
+
"epoch": 19.915337423312884,
|
| 113624 |
+
"grad_norm": 0.28727856278419495,
|
| 113625 |
+
"learning_rate": 2.4687721845162437e-09,
|
| 113626 |
+
"loss": 0.6793022155761719,
|
| 113627 |
+
"step": 16231
|
| 113628 |
+
},
|
| 113629 |
+
{
|
| 113630 |
+
"epoch": 19.916564417177913,
|
| 113631 |
+
"grad_norm": 0.2557753026485443,
|
| 113632 |
+
"learning_rate": 2.3987407873987675e-09,
|
| 113633 |
+
"loss": 0.4969423711299896,
|
| 113634 |
+
"step": 16232
|
| 113635 |
+
},
|
| 113636 |
+
{
|
| 113637 |
+
"epoch": 19.917791411042945,
|
| 113638 |
+
"grad_norm": 0.25132516026496887,
|
| 113639 |
+
"learning_rate": 2.329716972293561e-09,
|
| 113640 |
+
"loss": 0.5655540823936462,
|
| 113641 |
+
"step": 16233
|
| 113642 |
+
},
|
| 113643 |
+
{
|
| 113644 |
+
"epoch": 19.919018404907977,
|
| 113645 |
+
"grad_norm": 0.3251507580280304,
|
| 113646 |
+
"learning_rate": 2.2617007419817313e-09,
|
| 113647 |
+
"loss": 0.7593796253204346,
|
| 113648 |
+
"step": 16234
|
| 113649 |
+
},
|
| 113650 |
+
{
|
| 113651 |
+
"epoch": 19.920245398773005,
|
| 113652 |
+
"grad_norm": 0.2656572759151459,
|
| 113653 |
+
"learning_rate": 2.1946920992027553e-09,
|
| 113654 |
+
"loss": 0.6801402568817139,
|
| 113655 |
+
"step": 16235
|
| 113656 |
+
},
|
| 113657 |
+
{
|
| 113658 |
+
"epoch": 19.921472392638037,
|
| 113659 |
+
"grad_norm": 0.26032915711402893,
|
| 113660 |
+
"learning_rate": 2.1286910466628006e-09,
|
| 113661 |
+
"loss": 0.5409698486328125,
|
| 113662 |
+
"step": 16236
|
| 113663 |
+
},
|
| 113664 |
+
{
|
| 113665 |
+
"epoch": 19.92269938650307,
|
| 113666 |
+
"grad_norm": 0.25649771094322205,
|
| 113667 |
+
"learning_rate": 2.0636975870180764e-09,
|
| 113668 |
+
"loss": 0.42548850178718567,
|
| 113669 |
+
"step": 16237
|
| 113670 |
+
},
|
| 113671 |
+
{
|
| 113672 |
+
"epoch": 19.923926380368098,
|
| 113673 |
+
"grad_norm": 0.28882917761802673,
|
| 113674 |
+
"learning_rate": 1.999711722891484e-09,
|
| 113675 |
+
"loss": 0.6696641445159912,
|
| 113676 |
+
"step": 16238
|
| 113677 |
+
},
|
| 113678 |
+
{
|
| 113679 |
+
"epoch": 19.92515337423313,
|
| 113680 |
+
"grad_norm": 0.25472167134284973,
|
| 113681 |
+
"learning_rate": 1.936733456855966e-09,
|
| 113682 |
+
"loss": 0.5762321949005127,
|
| 113683 |
+
"step": 16239
|
| 113684 |
+
},
|
| 113685 |
+
{
|
| 113686 |
+
"epoch": 19.926380368098158,
|
| 113687 |
+
"grad_norm": 0.24897408485412598,
|
| 113688 |
+
"learning_rate": 1.8747627914567077e-09,
|
| 113689 |
+
"loss": 0.606791615486145,
|
| 113690 |
+
"step": 16240
|
| 113691 |
+
},
|
| 113692 |
+
{
|
| 113693 |
+
"epoch": 19.92760736196319,
|
| 113694 |
+
"grad_norm": 0.30643516778945923,
|
| 113695 |
+
"learning_rate": 1.8137997291889363e-09,
|
| 113696 |
+
"loss": 0.605108380317688,
|
| 113697 |
+
"step": 16241
|
| 113698 |
+
},
|
| 113699 |
+
{
|
| 113700 |
+
"epoch": 19.928834355828222,
|
| 113701 |
+
"grad_norm": 0.3065126836299896,
|
| 113702 |
+
"learning_rate": 1.7538442725090198e-09,
|
| 113703 |
+
"loss": 0.6616383790969849,
|
| 113704 |
+
"step": 16242
|
| 113705 |
+
},
|
| 113706 |
+
{
|
| 113707 |
+
"epoch": 19.93006134969325,
|
| 113708 |
+
"grad_norm": 0.27115145325660706,
|
| 113709 |
+
"learning_rate": 1.694896423834469e-09,
|
| 113710 |
+
"loss": 0.7380319237709045,
|
| 113711 |
+
"step": 16243
|
| 113712 |
+
},
|
| 113713 |
+
{
|
| 113714 |
+
"epoch": 19.931288343558283,
|
| 113715 |
+
"grad_norm": 0.2318820357322693,
|
| 113716 |
+
"learning_rate": 1.6369561855411608e-09,
|
| 113717 |
+
"loss": 0.47997498512268066,
|
| 113718 |
+
"step": 16244
|
| 113719 |
+
},
|
| 113720 |
+
{
|
| 113721 |
+
"epoch": 19.93251533742331,
|
| 113722 |
+
"grad_norm": 0.2823431193828583,
|
| 113723 |
+
"learning_rate": 1.580023559966115e-09,
|
| 113724 |
+
"loss": 0.7115047574043274,
|
| 113725 |
+
"step": 16245
|
| 113726 |
+
},
|
| 113727 |
+
{
|
| 113728 |
+
"epoch": 19.933742331288343,
|
| 113729 |
+
"grad_norm": 0.30200645327568054,
|
| 113730 |
+
"learning_rate": 1.5240985493991667e-09,
|
| 113731 |
+
"loss": 0.7860561609268188,
|
| 113732 |
+
"step": 16246
|
| 113733 |
+
},
|
| 113734 |
+
{
|
| 113735 |
+
"epoch": 19.934969325153375,
|
| 113736 |
+
"grad_norm": 0.2997346520423889,
|
| 113737 |
+
"learning_rate": 1.4691811560996193e-09,
|
| 113738 |
+
"loss": 0.6527193784713745,
|
| 113739 |
+
"step": 16247
|
| 113740 |
+
},
|
| 113741 |
+
{
|
| 113742 |
+
"epoch": 19.936196319018403,
|
| 113743 |
+
"grad_norm": 0.24736203253269196,
|
| 113744 |
+
"learning_rate": 1.4152713822795927e-09,
|
| 113745 |
+
"loss": 0.5359556674957275,
|
| 113746 |
+
"step": 16248
|
| 113747 |
+
},
|
| 113748 |
+
{
|
| 113749 |
+
"epoch": 19.937423312883435,
|
| 113750 |
+
"grad_norm": 0.26944491267204285,
|
| 113751 |
+
"learning_rate": 1.362369230112348e-09,
|
| 113752 |
+
"loss": 0.5682604312896729,
|
| 113753 |
+
"step": 16249
|
| 113754 |
+
},
|
| 113755 |
+
{
|
| 113756 |
+
"epoch": 19.938650306748468,
|
| 113757 |
+
"grad_norm": 0.2371739149093628,
|
| 113758 |
+
"learning_rate": 1.3104747017295139e-09,
|
| 113759 |
+
"loss": 0.5160844326019287,
|
| 113760 |
+
"step": 16250
|
| 113761 |
+
},
|
| 113762 |
+
{
|
| 113763 |
+
"epoch": 19.939877300613496,
|
| 113764 |
+
"grad_norm": 0.28414422273635864,
|
| 113765 |
+
"learning_rate": 1.2595877992238603e-09,
|
| 113766 |
+
"loss": 0.6134630441665649,
|
| 113767 |
+
"step": 16251
|
| 113768 |
+
},
|
| 113769 |
+
{
|
| 113770 |
+
"epoch": 19.941104294478528,
|
| 113771 |
+
"grad_norm": 0.2545883059501648,
|
| 113772 |
+
"learning_rate": 1.209708524643749e-09,
|
| 113773 |
+
"loss": 0.3812911808490753,
|
| 113774 |
+
"step": 16252
|
| 113775 |
+
},
|
| 113776 |
+
{
|
| 113777 |
+
"epoch": 19.94233128834356,
|
| 113778 |
+
"grad_norm": 0.24534358084201813,
|
| 113779 |
+
"learning_rate": 1.160836880001459e-09,
|
| 113780 |
+
"loss": 0.4503448009490967,
|
| 113781 |
+
"step": 16253
|
| 113782 |
+
},
|
| 113783 |
+
{
|
| 113784 |
+
"epoch": 19.94355828220859,
|
| 113785 |
+
"grad_norm": 0.3093093931674957,
|
| 113786 |
+
"learning_rate": 1.1129728672676364e-09,
|
| 113787 |
+
"loss": 0.614890456199646,
|
| 113788 |
+
"step": 16254
|
| 113789 |
+
},
|
| 113790 |
+
{
|
| 113791 |
+
"epoch": 19.94478527607362,
|
| 113792 |
+
"grad_norm": 0.2969963550567627,
|
| 113793 |
+
"learning_rate": 1.0661164883712937e-09,
|
| 113794 |
+
"loss": 0.6617600917816162,
|
| 113795 |
+
"step": 16255
|
| 113796 |
+
},
|
| 113797 |
+
{
|
| 113798 |
+
"epoch": 19.94601226993865,
|
| 113799 |
+
"grad_norm": 0.24619616568088531,
|
| 113800 |
+
"learning_rate": 1.02026774519981e-09,
|
| 113801 |
+
"loss": 0.61571204662323,
|
| 113802 |
+
"step": 16256
|
| 113803 |
+
},
|
| 113804 |
+
{
|
| 113805 |
+
"epoch": 19.94723926380368,
|
| 113806 |
+
"grad_norm": 0.2435264140367508,
|
| 113807 |
+
"learning_rate": 9.754266396017065e-10,
|
| 113808 |
+
"loss": 0.44566264748573303,
|
| 113809 |
+
"step": 16257
|
| 113810 |
+
},
|
| 113811 |
+
{
|
| 113812 |
+
"epoch": 19.948466257668713,
|
| 113813 |
+
"grad_norm": 0.290048748254776,
|
| 113814 |
+
"learning_rate": 9.315931733866467e-10,
|
| 113815 |
+
"loss": 0.7629603743553162,
|
| 113816 |
+
"step": 16258
|
| 113817 |
+
},
|
| 113818 |
+
{
|
| 113819 |
+
"epoch": 19.94969325153374,
|
| 113820 |
+
"grad_norm": 0.2462032437324524,
|
| 113821 |
+
"learning_rate": 8.887673483171099e-10,
|
| 113822 |
+
"loss": 0.3848365545272827,
|
| 113823 |
+
"step": 16259
|
| 113824 |
+
},
|
| 113825 |
+
{
|
| 113826 |
+
"epoch": 19.950920245398773,
|
| 113827 |
+
"grad_norm": 0.2963328957557678,
|
| 113828 |
+
"learning_rate": 8.469491661222684e-10,
|
| 113829 |
+
"loss": 0.7475588321685791,
|
| 113830 |
+
"step": 16260
|
| 113831 |
+
},
|
| 113832 |
+
{
|
| 113833 |
+
"epoch": 19.952147239263805,
|
| 113834 |
+
"grad_norm": 0.27099505066871643,
|
| 113835 |
+
"learning_rate": 8.06138628489661e-10,
|
| 113836 |
+
"loss": 0.49332553148269653,
|
| 113837 |
+
"step": 16261
|
| 113838 |
+
},
|
| 113839 |
+
{
|
| 113840 |
+
"epoch": 19.953374233128834,
|
| 113841 |
+
"grad_norm": 0.25060924887657166,
|
| 113842 |
+
"learning_rate": 7.663357370596425e-10,
|
| 113843 |
+
"loss": 0.5212326049804688,
|
| 113844 |
+
"step": 16262
|
| 113845 |
+
},
|
| 113846 |
+
{
|
| 113847 |
+
"epoch": 19.954601226993866,
|
| 113848 |
+
"grad_norm": 0.2802629768848419,
|
| 113849 |
+
"learning_rate": 7.275404934364849e-10,
|
| 113850 |
+
"loss": 0.6265736222267151,
|
| 113851 |
+
"step": 16263
|
| 113852 |
+
},
|
| 113853 |
+
{
|
| 113854 |
+
"epoch": 19.955828220858894,
|
| 113855 |
+
"grad_norm": 0.24711941182613373,
|
| 113856 |
+
"learning_rate": 6.897528991883784e-10,
|
| 113857 |
+
"loss": 0.6305452585220337,
|
| 113858 |
+
"step": 16264
|
| 113859 |
+
},
|
| 113860 |
+
{
|
| 113861 |
+
"epoch": 19.957055214723926,
|
| 113862 |
+
"grad_norm": 0.2828010320663452,
|
| 113863 |
+
"learning_rate": 6.529729558363284e-10,
|
| 113864 |
+
"loss": 0.5041950941085815,
|
| 113865 |
+
"step": 16265
|
| 113866 |
+
},
|
| 113867 |
+
{
|
| 113868 |
+
"epoch": 19.958282208588958,
|
| 113869 |
+
"grad_norm": 0.26520174741744995,
|
| 113870 |
+
"learning_rate": 6.172006648624828e-10,
|
| 113871 |
+
"loss": 0.7630486488342285,
|
| 113872 |
+
"step": 16266
|
| 113873 |
+
},
|
| 113874 |
+
{
|
| 113875 |
+
"epoch": 19.959509202453987,
|
| 113876 |
+
"grad_norm": 0.22848939895629883,
|
| 113877 |
+
"learning_rate": 5.824360277073559e-10,
|
| 113878 |
+
"loss": 0.39715874195098877,
|
| 113879 |
+
"step": 16267
|
| 113880 |
+
},
|
| 113881 |
+
{
|
| 113882 |
+
"epoch": 19.96073619631902,
|
| 113883 |
+
"grad_norm": 0.25177448987960815,
|
| 113884 |
+
"learning_rate": 5.486790457753799e-10,
|
| 113885 |
+
"loss": 0.4023863971233368,
|
| 113886 |
+
"step": 16268
|
| 113887 |
+
},
|
| 113888 |
+
{
|
| 113889 |
+
"epoch": 19.96196319018405,
|
| 113890 |
+
"grad_norm": 0.29539355635643005,
|
| 113891 |
+
"learning_rate": 5.159297204238023e-10,
|
| 113892 |
+
"loss": 0.5298452377319336,
|
| 113893 |
+
"step": 16269
|
| 113894 |
+
},
|
| 113895 |
+
{
|
| 113896 |
+
"epoch": 19.96319018404908,
|
| 113897 |
+
"grad_norm": 0.2694055438041687,
|
| 113898 |
+
"learning_rate": 4.841880529765641e-10,
|
| 113899 |
+
"loss": 0.45743635296821594,
|
| 113900 |
+
"step": 16270
|
| 113901 |
+
},
|
| 113902 |
+
{
|
| 113903 |
+
"epoch": 19.96441717791411,
|
| 113904 |
+
"grad_norm": 0.25338271260261536,
|
| 113905 |
+
"learning_rate": 4.534540447076463e-10,
|
| 113906 |
+
"loss": 0.5844091176986694,
|
| 113907 |
+
"step": 16271
|
| 113908 |
+
},
|
| 113909 |
+
{
|
| 113910 |
+
"epoch": 19.96564417177914,
|
| 113911 |
+
"grad_norm": 0.2750682234764099,
|
| 113912 |
+
"learning_rate": 4.2372769686049863e-10,
|
| 113913 |
+
"loss": 0.4653807282447815,
|
| 113914 |
+
"step": 16272
|
| 113915 |
+
},
|
| 113916 |
+
{
|
| 113917 |
+
"epoch": 19.96687116564417,
|
| 113918 |
+
"grad_norm": 0.23543284833431244,
|
| 113919 |
+
"learning_rate": 3.9500901063416194e-10,
|
| 113920 |
+
"loss": 0.5624647736549377,
|
| 113921 |
+
"step": 16273
|
| 113922 |
+
},
|
| 113923 |
+
{
|
| 113924 |
+
"epoch": 19.968098159509204,
|
| 113925 |
+
"grad_norm": 0.28273266553878784,
|
| 113926 |
+
"learning_rate": 3.672979871804927e-10,
|
| 113927 |
+
"loss": 0.6415307521820068,
|
| 113928 |
+
"step": 16274
|
| 113929 |
+
},
|
| 113930 |
+
{
|
| 113931 |
+
"epoch": 19.969325153374232,
|
| 113932 |
+
"grad_norm": 0.26589664816856384,
|
| 113933 |
+
"learning_rate": 3.4059462762359164e-10,
|
| 113934 |
+
"loss": 0.629359781742096,
|
| 113935 |
+
"step": 16275
|
| 113936 |
+
},
|
| 113937 |
+
{
|
| 113938 |
+
"epoch": 19.970552147239264,
|
| 113939 |
+
"grad_norm": 0.30133673548698425,
|
| 113940 |
+
"learning_rate": 3.1489893303204843e-10,
|
| 113941 |
+
"loss": 0.6743714213371277,
|
| 113942 |
+
"step": 16276
|
| 113943 |
+
},
|
| 113944 |
+
{
|
| 113945 |
+
"epoch": 19.971779141104296,
|
| 113946 |
+
"grad_norm": 0.24149096012115479,
|
| 113947 |
+
"learning_rate": 2.902109044466972e-10,
|
| 113948 |
+
"loss": 0.5655030012130737,
|
| 113949 |
+
"step": 16277
|
| 113950 |
+
},
|
| 113951 |
+
{
|
| 113952 |
+
"epoch": 19.973006134969324,
|
| 113953 |
+
"grad_norm": 0.2459070384502411,
|
| 113954 |
+
"learning_rate": 2.665305428639631e-10,
|
| 113955 |
+
"loss": 0.5063390731811523,
|
| 113956 |
+
"step": 16278
|
| 113957 |
+
},
|
| 113958 |
+
{
|
| 113959 |
+
"epoch": 19.974233128834356,
|
| 113960 |
+
"grad_norm": 0.27514341473579407,
|
| 113961 |
+
"learning_rate": 2.4385784923308674e-10,
|
| 113962 |
+
"loss": 0.6695590615272522,
|
| 113963 |
+
"step": 16279
|
| 113964 |
+
},
|
| 113965 |
+
{
|
| 113966 |
+
"epoch": 19.975460122699385,
|
| 113967 |
+
"grad_norm": 0.29874908924102783,
|
| 113968 |
+
"learning_rate": 2.2219282447277778e-10,
|
| 113969 |
+
"loss": 0.34940165281295776,
|
| 113970 |
+
"step": 16280
|
| 113971 |
+
},
|
| 113972 |
+
{
|
| 113973 |
+
"epoch": 19.976687116564417,
|
| 113974 |
+
"grad_norm": 0.27747642993927,
|
| 113975 |
+
"learning_rate": 2.015354694517857e-10,
|
| 113976 |
+
"loss": 0.6391558647155762,
|
| 113977 |
+
"step": 16281
|
| 113978 |
+
},
|
| 113979 |
+
{
|
| 113980 |
+
"epoch": 19.97791411042945,
|
| 113981 |
+
"grad_norm": 0.2599896788597107,
|
| 113982 |
+
"learning_rate": 1.8188578500832888e-10,
|
| 113983 |
+
"loss": 0.5198867321014404,
|
| 113984 |
+
"step": 16282
|
| 113985 |
+
},
|
| 113986 |
+
{
|
| 113987 |
+
"epoch": 19.979141104294477,
|
| 113988 |
+
"grad_norm": 0.291477769613266,
|
| 113989 |
+
"learning_rate": 1.6324377193066565e-10,
|
| 113990 |
+
"loss": 0.6670821905136108,
|
| 113991 |
+
"step": 16283
|
| 113992 |
+
},
|
| 113993 |
+
{
|
| 113994 |
+
"epoch": 19.98036809815951,
|
| 113995 |
+
"grad_norm": 0.2764919102191925,
|
| 113996 |
+
"learning_rate": 1.4560943096819658e-10,
|
| 113997 |
+
"loss": 0.5779229402542114,
|
| 113998 |
+
"step": 16284
|
| 113999 |
+
},
|
| 114000 |
+
{
|
| 114001 |
+
"epoch": 19.98159509202454,
|
| 114002 |
+
"grad_norm": 0.2672038972377777,
|
| 114003 |
+
"learning_rate": 1.289827628370155e-10,
|
| 114004 |
+
"loss": 0.5859079957008362,
|
| 114005 |
+
"step": 16285
|
| 114006 |
+
},
|
| 114007 |
+
{
|
| 114008 |
+
"epoch": 19.98282208588957,
|
| 114009 |
+
"grad_norm": 0.25981295108795166,
|
| 114010 |
+
"learning_rate": 1.1336376820325623e-10,
|
| 114011 |
+
"loss": 0.6229197978973389,
|
| 114012 |
+
"step": 16286
|
| 114013 |
+
},
|
| 114014 |
+
{
|
| 114015 |
+
"epoch": 19.9840490797546,
|
| 114016 |
+
"grad_norm": 0.2849527895450592,
|
| 114017 |
+
"learning_rate": 9.87524476997459e-11,
|
| 114018 |
+
"loss": 0.7990140914916992,
|
| 114019 |
+
"step": 16287
|
| 114020 |
+
},
|
| 114021 |
+
{
|
| 114022 |
+
"epoch": 19.98527607361963,
|
| 114023 |
+
"grad_norm": 0.27847468852996826,
|
| 114024 |
+
"learning_rate": 8.514880191212715e-11,
|
| 114025 |
+
"loss": 0.5412949323654175,
|
| 114026 |
+
"step": 16288
|
| 114027 |
+
},
|
| 114028 |
+
{
|
| 114029 |
+
"epoch": 19.986503067484662,
|
| 114030 |
+
"grad_norm": 0.23352737724781036,
|
| 114031 |
+
"learning_rate": 7.255283138996039e-11,
|
| 114032 |
+
"loss": 0.3678314983844757,
|
| 114033 |
+
"step": 16289
|
| 114034 |
+
},
|
| 114035 |
+
{
|
| 114036 |
+
"epoch": 19.987730061349694,
|
| 114037 |
+
"grad_norm": 0.2763413190841675,
|
| 114038 |
+
"learning_rate": 6.096453664117263e-11,
|
| 114039 |
+
"loss": 0.6856487989425659,
|
| 114040 |
+
"step": 16290
|
| 114041 |
+
},
|
| 114042 |
+
{
|
| 114043 |
+
"epoch": 19.988957055214723,
|
| 114044 |
+
"grad_norm": 0.25349316000938416,
|
| 114045 |
+
"learning_rate": 5.038391813483312e-11,
|
| 114046 |
+
"loss": 0.5396376848220825,
|
| 114047 |
+
"step": 16291
|
| 114048 |
+
},
|
| 114049 |
+
{
|
| 114050 |
+
"epoch": 19.990184049079755,
|
| 114051 |
+
"grad_norm": 0.25586146116256714,
|
| 114052 |
+
"learning_rate": 4.0810976292826597e-11,
|
| 114053 |
+
"loss": 0.6023741960525513,
|
| 114054 |
+
"step": 16292
|
| 114055 |
+
},
|
| 114056 |
+
{
|
| 114057 |
+
"epoch": 19.991411042944787,
|
| 114058 |
+
"grad_norm": 0.8770232200622559,
|
| 114059 |
+
"learning_rate": 3.224571150373112e-11,
|
| 114060 |
+
"loss": 0.4764806628227234,
|
| 114061 |
+
"step": 16293
|
| 114062 |
+
},
|
| 114063 |
+
{
|
| 114064 |
+
"epoch": 19.992638036809815,
|
| 114065 |
+
"grad_norm": 0.3072080910205841,
|
| 114066 |
+
"learning_rate": 2.4688124114491397e-11,
|
| 114067 |
+
"loss": 0.6098132133483887,
|
| 114068 |
+
"step": 16294
|
| 114069 |
+
},
|
| 114070 |
+
{
|
| 114071 |
+
"epoch": 19.993865030674847,
|
| 114072 |
+
"grad_norm": 0.2564147114753723,
|
| 114073 |
+
"learning_rate": 1.8138214427643185e-11,
|
| 114074 |
+
"loss": 0.5958019495010376,
|
| 114075 |
+
"step": 16295
|
| 114076 |
+
},
|
| 114077 |
+
{
|
| 114078 |
+
"epoch": 19.99509202453988,
|
| 114079 |
+
"grad_norm": 0.25169652700424194,
|
| 114080 |
+
"learning_rate": 1.2595982706864461e-11,
|
| 114081 |
+
"loss": 0.42747119069099426,
|
| 114082 |
+
"step": 16296
|
| 114083 |
+
},
|
| 114084 |
+
{
|
| 114085 |
+
"epoch": 19.996319018404908,
|
| 114086 |
+
"grad_norm": 0.25083857774734497,
|
| 114087 |
+
"learning_rate": 8.061429176975388e-12,
|
| 114088 |
+
"loss": 0.5245164632797241,
|
| 114089 |
+
"step": 16297
|
| 114090 |
+
},
|
| 114091 |
+
{
|
| 114092 |
+
"epoch": 19.99754601226994,
|
| 114093 |
+
"grad_norm": 0.24573828279972076,
|
| 114094 |
+
"learning_rate": 4.5345540183872046e-12,
|
| 114095 |
+
"loss": 0.4817044734954834,
|
| 114096 |
+
"step": 16298
|
| 114097 |
+
},
|
| 114098 |
+
{
|
| 114099 |
+
"epoch": 19.998773006134968,
|
| 114100 |
+
"grad_norm": 0.2476826310157776,
|
| 114101 |
+
"learning_rate": 2.0153573754289058e-12,
|
| 114102 |
+
"loss": 0.5343716144561768,
|
| 114103 |
+
"step": 16299
|
| 114104 |
+
},
|
| 114105 |
+
{
|
| 114106 |
+
"epoch": 20.0,
|
| 114107 |
+
"grad_norm": 0.27893704175949097,
|
| 114108 |
+
"learning_rate": 5.038393480205628e-13,
|
| 114109 |
+
"loss": 0.3585406243801117,
|
| 114110 |
+
"step": 16300
|
| 114111 |
}
|
| 114112 |
],
|
| 114113 |
"logging_steps": 1,
|
|
|
|
| 114122 |
"should_evaluate": false,
|
| 114123 |
"should_log": false,
|
| 114124 |
"should_save": true,
|
| 114125 |
+
"should_training_stop": true
|
| 114126 |
},
|
| 114127 |
"attributes": {}
|
| 114128 |
}
|
| 114129 |
},
|
| 114130 |
+
"total_flos": 4.559192932598415e+19,
|
| 114131 |
"train_batch_size": 8,
|
| 114132 |
"trial_name": null,
|
| 114133 |
"trial_params": null
|