Training in progress, step 8342, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 791869518
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03cc5d7b651a639b0220de3a2a1ccacf8b95355b5dcd8c8b028327a0da96fdfb
|
| 3 |
size 791869518
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2375752250
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bed96b488a7dd948ce9646603587fe38f7e77e9afc4e5a26e9aef530b83068ba
|
| 3 |
size 2375752250
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:08b735cffdc42abe93b366df558ae724495aca3da952a5c2458609ec9e48fe3c
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -11335,6 +11335,482 @@
|
|
| 11335 |
"eval_samples_per_second": 1116.87,
|
| 11336 |
"eval_steps_per_second": 34.904,
|
| 11337 |
"step": 8000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11338 |
}
|
| 11339 |
],
|
| 11340 |
"logging_steps": 5,
|
|
@@ -11349,12 +11825,12 @@
|
|
| 11349 |
"should_evaluate": false,
|
| 11350 |
"should_log": false,
|
| 11351 |
"should_save": true,
|
| 11352 |
-
"should_training_stop":
|
| 11353 |
},
|
| 11354 |
"attributes": {}
|
| 11355 |
}
|
| 11356 |
},
|
| 11357 |
-
"total_flos": 3.
|
| 11358 |
"train_batch_size": 4,
|
| 11359 |
"trial_name": null,
|
| 11360 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.9999700320656897,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 8342,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 11335 |
"eval_samples_per_second": 1116.87,
|
| 11336 |
"eval_steps_per_second": 34.904,
|
| 11337 |
"step": 8000
|
| 11338 |
+
},
|
| 11339 |
+
{
|
| 11340 |
+
"epoch": 0.9595732566154215,
|
| 11341 |
+
"grad_norm": 102.375,
|
| 11342 |
+
"learning_rate": 8.978286932196616e-08,
|
| 11343 |
+
"loss": 64.3574,
|
| 11344 |
+
"step": 8005
|
| 11345 |
+
},
|
| 11346 |
+
{
|
| 11347 |
+
"epoch": 0.9601726153016272,
|
| 11348 |
+
"grad_norm": 113.8125,
|
| 11349 |
+
"learning_rate": 8.845077927267883e-08,
|
| 11350 |
+
"loss": 62.3656,
|
| 11351 |
+
"step": 8010
|
| 11352 |
+
},
|
| 11353 |
+
{
|
| 11354 |
+
"epoch": 0.960771973987833,
|
| 11355 |
+
"grad_norm": 106.625,
|
| 11356 |
+
"learning_rate": 8.711868922339149e-08,
|
| 11357 |
+
"loss": 64.0395,
|
| 11358 |
+
"step": 8015
|
| 11359 |
+
},
|
| 11360 |
+
{
|
| 11361 |
+
"epoch": 0.9613713326740387,
|
| 11362 |
+
"grad_norm": 108.875,
|
| 11363 |
+
"learning_rate": 8.578659917410417e-08,
|
| 11364 |
+
"loss": 65.0463,
|
| 11365 |
+
"step": 8020
|
| 11366 |
+
},
|
| 11367 |
+
{
|
| 11368 |
+
"epoch": 0.9619706913602445,
|
| 11369 |
+
"grad_norm": 106.0625,
|
| 11370 |
+
"learning_rate": 8.445450912481683e-08,
|
| 11371 |
+
"loss": 63.7409,
|
| 11372 |
+
"step": 8025
|
| 11373 |
+
},
|
| 11374 |
+
{
|
| 11375 |
+
"epoch": 0.9625700500464502,
|
| 11376 |
+
"grad_norm": 108.375,
|
| 11377 |
+
"learning_rate": 8.31224190755295e-08,
|
| 11378 |
+
"loss": 63.3834,
|
| 11379 |
+
"step": 8030
|
| 11380 |
+
},
|
| 11381 |
+
{
|
| 11382 |
+
"epoch": 0.9631694087326561,
|
| 11383 |
+
"grad_norm": 108.125,
|
| 11384 |
+
"learning_rate": 8.179032902624216e-08,
|
| 11385 |
+
"loss": 63.9561,
|
| 11386 |
+
"step": 8035
|
| 11387 |
+
},
|
| 11388 |
+
{
|
| 11389 |
+
"epoch": 0.9637687674188619,
|
| 11390 |
+
"grad_norm": 107.5,
|
| 11391 |
+
"learning_rate": 8.045823897695484e-08,
|
| 11392 |
+
"loss": 61.5704,
|
| 11393 |
+
"step": 8040
|
| 11394 |
+
},
|
| 11395 |
+
{
|
| 11396 |
+
"epoch": 0.9643681261050676,
|
| 11397 |
+
"grad_norm": 107.375,
|
| 11398 |
+
"learning_rate": 7.91261489276675e-08,
|
| 11399 |
+
"loss": 63.2019,
|
| 11400 |
+
"step": 8045
|
| 11401 |
+
},
|
| 11402 |
+
{
|
| 11403 |
+
"epoch": 0.9649674847912734,
|
| 11404 |
+
"grad_norm": 105.375,
|
| 11405 |
+
"learning_rate": 7.779405887838017e-08,
|
| 11406 |
+
"loss": 64.2461,
|
| 11407 |
+
"step": 8050
|
| 11408 |
+
},
|
| 11409 |
+
{
|
| 11410 |
+
"epoch": 0.9655668434774791,
|
| 11411 |
+
"grad_norm": 108.5,
|
| 11412 |
+
"learning_rate": 7.646196882909283e-08,
|
| 11413 |
+
"loss": 63.3329,
|
| 11414 |
+
"step": 8055
|
| 11415 |
+
},
|
| 11416 |
+
{
|
| 11417 |
+
"epoch": 0.9661662021636849,
|
| 11418 |
+
"grad_norm": 108.625,
|
| 11419 |
+
"learning_rate": 7.512987877980551e-08,
|
| 11420 |
+
"loss": 64.4003,
|
| 11421 |
+
"step": 8060
|
| 11422 |
+
},
|
| 11423 |
+
{
|
| 11424 |
+
"epoch": 0.9667655608498906,
|
| 11425 |
+
"grad_norm": 112.0625,
|
| 11426 |
+
"learning_rate": 7.379778873051817e-08,
|
| 11427 |
+
"loss": 63.6081,
|
| 11428 |
+
"step": 8065
|
| 11429 |
+
},
|
| 11430 |
+
{
|
| 11431 |
+
"epoch": 0.9673649195360964,
|
| 11432 |
+
"grad_norm": 107.9375,
|
| 11433 |
+
"learning_rate": 7.246569868123084e-08,
|
| 11434 |
+
"loss": 63.1161,
|
| 11435 |
+
"step": 8070
|
| 11436 |
+
},
|
| 11437 |
+
{
|
| 11438 |
+
"epoch": 0.9679642782223021,
|
| 11439 |
+
"grad_norm": 109.0,
|
| 11440 |
+
"learning_rate": 7.113360863194353e-08,
|
| 11441 |
+
"loss": 64.4234,
|
| 11442 |
+
"step": 8075
|
| 11443 |
+
},
|
| 11444 |
+
{
|
| 11445 |
+
"epoch": 0.9685636369085079,
|
| 11446 |
+
"grad_norm": 102.6875,
|
| 11447 |
+
"learning_rate": 6.98015185826562e-08,
|
| 11448 |
+
"loss": 63.9451,
|
| 11449 |
+
"step": 8080
|
| 11450 |
+
},
|
| 11451 |
+
{
|
| 11452 |
+
"epoch": 0.9691629955947136,
|
| 11453 |
+
"grad_norm": 106.1875,
|
| 11454 |
+
"learning_rate": 6.846942853336886e-08,
|
| 11455 |
+
"loss": 63.7961,
|
| 11456 |
+
"step": 8085
|
| 11457 |
+
},
|
| 11458 |
+
{
|
| 11459 |
+
"epoch": 0.9697623542809194,
|
| 11460 |
+
"grad_norm": 107.0,
|
| 11461 |
+
"learning_rate": 6.713733848408152e-08,
|
| 11462 |
+
"loss": 62.8793,
|
| 11463 |
+
"step": 8090
|
| 11464 |
+
},
|
| 11465 |
+
{
|
| 11466 |
+
"epoch": 0.9703617129671251,
|
| 11467 |
+
"grad_norm": 107.375,
|
| 11468 |
+
"learning_rate": 6.58052484347942e-08,
|
| 11469 |
+
"loss": 63.4959,
|
| 11470 |
+
"step": 8095
|
| 11471 |
+
},
|
| 11472 |
+
{
|
| 11473 |
+
"epoch": 0.9709610716533309,
|
| 11474 |
+
"grad_norm": 103.375,
|
| 11475 |
+
"learning_rate": 6.447315838550686e-08,
|
| 11476 |
+
"loss": 62.9931,
|
| 11477 |
+
"step": 8100
|
| 11478 |
+
},
|
| 11479 |
+
{
|
| 11480 |
+
"epoch": 0.9715604303395367,
|
| 11481 |
+
"grad_norm": 108.25,
|
| 11482 |
+
"learning_rate": 6.314106833621953e-08,
|
| 11483 |
+
"loss": 63.7424,
|
| 11484 |
+
"step": 8105
|
| 11485 |
+
},
|
| 11486 |
+
{
|
| 11487 |
+
"epoch": 0.9721597890257424,
|
| 11488 |
+
"grad_norm": 111.4375,
|
| 11489 |
+
"learning_rate": 6.180897828693219e-08,
|
| 11490 |
+
"loss": 64.0168,
|
| 11491 |
+
"step": 8110
|
| 11492 |
+
},
|
| 11493 |
+
{
|
| 11494 |
+
"epoch": 0.9727591477119483,
|
| 11495 |
+
"grad_norm": 109.8125,
|
| 11496 |
+
"learning_rate": 6.047688823764486e-08,
|
| 11497 |
+
"loss": 63.7016,
|
| 11498 |
+
"step": 8115
|
| 11499 |
+
},
|
| 11500 |
+
{
|
| 11501 |
+
"epoch": 0.973358506398154,
|
| 11502 |
+
"grad_norm": 106.125,
|
| 11503 |
+
"learning_rate": 5.9144798188357535e-08,
|
| 11504 |
+
"loss": 62.0944,
|
| 11505 |
+
"step": 8120
|
| 11506 |
+
},
|
| 11507 |
+
{
|
| 11508 |
+
"epoch": 0.9739578650843598,
|
| 11509 |
+
"grad_norm": 109.0,
|
| 11510 |
+
"learning_rate": 5.78127081390702e-08,
|
| 11511 |
+
"loss": 62.8097,
|
| 11512 |
+
"step": 8125
|
| 11513 |
+
},
|
| 11514 |
+
{
|
| 11515 |
+
"epoch": 0.9745572237705655,
|
| 11516 |
+
"grad_norm": 106.6875,
|
| 11517 |
+
"learning_rate": 5.648061808978287e-08,
|
| 11518 |
+
"loss": 62.1002,
|
| 11519 |
+
"step": 8130
|
| 11520 |
+
},
|
| 11521 |
+
{
|
| 11522 |
+
"epoch": 0.9751565824567713,
|
| 11523 |
+
"grad_norm": 108.3125,
|
| 11524 |
+
"learning_rate": 5.5148528040495535e-08,
|
| 11525 |
+
"loss": 62.1948,
|
| 11526 |
+
"step": 8135
|
| 11527 |
+
},
|
| 11528 |
+
{
|
| 11529 |
+
"epoch": 0.975755941142977,
|
| 11530 |
+
"grad_norm": 107.1875,
|
| 11531 |
+
"learning_rate": 5.3816437991208206e-08,
|
| 11532 |
+
"loss": 62.8547,
|
| 11533 |
+
"step": 8140
|
| 11534 |
+
},
|
| 11535 |
+
{
|
| 11536 |
+
"epoch": 0.9763552998291828,
|
| 11537 |
+
"grad_norm": 113.3125,
|
| 11538 |
+
"learning_rate": 5.248434794192087e-08,
|
| 11539 |
+
"loss": 64.7491,
|
| 11540 |
+
"step": 8145
|
| 11541 |
+
},
|
| 11542 |
+
{
|
| 11543 |
+
"epoch": 0.9769546585153885,
|
| 11544 |
+
"grad_norm": 111.25,
|
| 11545 |
+
"learning_rate": 5.115225789263354e-08,
|
| 11546 |
+
"loss": 63.4233,
|
| 11547 |
+
"step": 8150
|
| 11548 |
+
},
|
| 11549 |
+
{
|
| 11550 |
+
"epoch": 0.9775540172015943,
|
| 11551 |
+
"grad_norm": 108.375,
|
| 11552 |
+
"learning_rate": 4.9820167843346206e-08,
|
| 11553 |
+
"loss": 64.265,
|
| 11554 |
+
"step": 8155
|
| 11555 |
+
},
|
| 11556 |
+
{
|
| 11557 |
+
"epoch": 0.9781533758878,
|
| 11558 |
+
"grad_norm": 107.75,
|
| 11559 |
+
"learning_rate": 4.8488077794058877e-08,
|
| 11560 |
+
"loss": 63.7708,
|
| 11561 |
+
"step": 8160
|
| 11562 |
+
},
|
| 11563 |
+
{
|
| 11564 |
+
"epoch": 0.9787527345740058,
|
| 11565 |
+
"grad_norm": 107.8125,
|
| 11566 |
+
"learning_rate": 4.715598774477154e-08,
|
| 11567 |
+
"loss": 63.6366,
|
| 11568 |
+
"step": 8165
|
| 11569 |
+
},
|
| 11570 |
+
{
|
| 11571 |
+
"epoch": 0.9793520932602116,
|
| 11572 |
+
"grad_norm": 108.5625,
|
| 11573 |
+
"learning_rate": 4.582389769548421e-08,
|
| 11574 |
+
"loss": 63.4673,
|
| 11575 |
+
"step": 8170
|
| 11576 |
+
},
|
| 11577 |
+
{
|
| 11578 |
+
"epoch": 0.9799514519464173,
|
| 11579 |
+
"grad_norm": 109.8125,
|
| 11580 |
+
"learning_rate": 4.449180764619688e-08,
|
| 11581 |
+
"loss": 63.0172,
|
| 11582 |
+
"step": 8175
|
| 11583 |
+
},
|
| 11584 |
+
{
|
| 11585 |
+
"epoch": 0.9805508106326231,
|
| 11586 |
+
"grad_norm": 111.25,
|
| 11587 |
+
"learning_rate": 4.315971759690955e-08,
|
| 11588 |
+
"loss": 64.0092,
|
| 11589 |
+
"step": 8180
|
| 11590 |
+
},
|
| 11591 |
+
{
|
| 11592 |
+
"epoch": 0.9811501693188288,
|
| 11593 |
+
"grad_norm": 107.75,
|
| 11594 |
+
"learning_rate": 4.182762754762222e-08,
|
| 11595 |
+
"loss": 63.3634,
|
| 11596 |
+
"step": 8185
|
| 11597 |
+
},
|
| 11598 |
+
{
|
| 11599 |
+
"epoch": 0.9817495280050346,
|
| 11600 |
+
"grad_norm": 105.375,
|
| 11601 |
+
"learning_rate": 4.049553749833488e-08,
|
| 11602 |
+
"loss": 62.8124,
|
| 11603 |
+
"step": 8190
|
| 11604 |
+
},
|
| 11605 |
+
{
|
| 11606 |
+
"epoch": 0.9823488866912403,
|
| 11607 |
+
"grad_norm": 106.5625,
|
| 11608 |
+
"learning_rate": 3.9163447449047554e-08,
|
| 11609 |
+
"loss": 63.596,
|
| 11610 |
+
"step": 8195
|
| 11611 |
+
},
|
| 11612 |
+
{
|
| 11613 |
+
"epoch": 0.9829482453774462,
|
| 11614 |
+
"grad_norm": 108.625,
|
| 11615 |
+
"learning_rate": 3.783135739976022e-08,
|
| 11616 |
+
"loss": 63.2134,
|
| 11617 |
+
"step": 8200
|
| 11618 |
+
},
|
| 11619 |
+
{
|
| 11620 |
+
"epoch": 0.9835476040636519,
|
| 11621 |
+
"grad_norm": 110.0,
|
| 11622 |
+
"learning_rate": 3.649926735047289e-08,
|
| 11623 |
+
"loss": 63.8696,
|
| 11624 |
+
"step": 8205
|
| 11625 |
+
},
|
| 11626 |
+
{
|
| 11627 |
+
"epoch": 0.9841469627498577,
|
| 11628 |
+
"grad_norm": 108.5,
|
| 11629 |
+
"learning_rate": 3.516717730118556e-08,
|
| 11630 |
+
"loss": 63.2911,
|
| 11631 |
+
"step": 8210
|
| 11632 |
+
},
|
| 11633 |
+
{
|
| 11634 |
+
"epoch": 0.9847463214360634,
|
| 11635 |
+
"grad_norm": 109.3125,
|
| 11636 |
+
"learning_rate": 3.383508725189823e-08,
|
| 11637 |
+
"loss": 64.3479,
|
| 11638 |
+
"step": 8215
|
| 11639 |
+
},
|
| 11640 |
+
{
|
| 11641 |
+
"epoch": 0.9853456801222692,
|
| 11642 |
+
"grad_norm": 104.3125,
|
| 11643 |
+
"learning_rate": 3.2502997202610895e-08,
|
| 11644 |
+
"loss": 63.6428,
|
| 11645 |
+
"step": 8220
|
| 11646 |
+
},
|
| 11647 |
+
{
|
| 11648 |
+
"epoch": 0.985945038808475,
|
| 11649 |
+
"grad_norm": 105.625,
|
| 11650 |
+
"learning_rate": 3.117090715332356e-08,
|
| 11651 |
+
"loss": 63.695,
|
| 11652 |
+
"step": 8225
|
| 11653 |
+
},
|
| 11654 |
+
{
|
| 11655 |
+
"epoch": 0.9865443974946807,
|
| 11656 |
+
"grad_norm": 107.5625,
|
| 11657 |
+
"learning_rate": 2.983881710403623e-08,
|
| 11658 |
+
"loss": 63.5868,
|
| 11659 |
+
"step": 8230
|
| 11660 |
+
},
|
| 11661 |
+
{
|
| 11662 |
+
"epoch": 0.9871437561808865,
|
| 11663 |
+
"grad_norm": 106.8125,
|
| 11664 |
+
"learning_rate": 2.8506727054748902e-08,
|
| 11665 |
+
"loss": 62.9535,
|
| 11666 |
+
"step": 8235
|
| 11667 |
+
},
|
| 11668 |
+
{
|
| 11669 |
+
"epoch": 0.9877431148670922,
|
| 11670 |
+
"grad_norm": 112.1875,
|
| 11671 |
+
"learning_rate": 2.717463700546157e-08,
|
| 11672 |
+
"loss": 63.9218,
|
| 11673 |
+
"step": 8240
|
| 11674 |
+
},
|
| 11675 |
+
{
|
| 11676 |
+
"epoch": 0.988342473553298,
|
| 11677 |
+
"grad_norm": 110.9375,
|
| 11678 |
+
"learning_rate": 2.5842546956174237e-08,
|
| 11679 |
+
"loss": 63.0742,
|
| 11680 |
+
"step": 8245
|
| 11681 |
+
},
|
| 11682 |
+
{
|
| 11683 |
+
"epoch": 0.9889418322395037,
|
| 11684 |
+
"grad_norm": 109.375,
|
| 11685 |
+
"learning_rate": 2.4510456906886905e-08,
|
| 11686 |
+
"loss": 63.139,
|
| 11687 |
+
"step": 8250
|
| 11688 |
+
},
|
| 11689 |
+
{
|
| 11690 |
+
"epoch": 0.9895411909257095,
|
| 11691 |
+
"grad_norm": 106.875,
|
| 11692 |
+
"learning_rate": 2.3178366857599572e-08,
|
| 11693 |
+
"loss": 63.3816,
|
| 11694 |
+
"step": 8255
|
| 11695 |
+
},
|
| 11696 |
+
{
|
| 11697 |
+
"epoch": 0.9901405496119152,
|
| 11698 |
+
"grad_norm": 108.8125,
|
| 11699 |
+
"learning_rate": 2.184627680831224e-08,
|
| 11700 |
+
"loss": 62.7286,
|
| 11701 |
+
"step": 8260
|
| 11702 |
+
},
|
| 11703 |
+
{
|
| 11704 |
+
"epoch": 0.990739908298121,
|
| 11705 |
+
"grad_norm": 108.875,
|
| 11706 |
+
"learning_rate": 2.0514186759024908e-08,
|
| 11707 |
+
"loss": 64.1767,
|
| 11708 |
+
"step": 8265
|
| 11709 |
+
},
|
| 11710 |
+
{
|
| 11711 |
+
"epoch": 0.9913392669843267,
|
| 11712 |
+
"grad_norm": 107.3125,
|
| 11713 |
+
"learning_rate": 1.9182096709737575e-08,
|
| 11714 |
+
"loss": 63.8556,
|
| 11715 |
+
"step": 8270
|
| 11716 |
+
},
|
| 11717 |
+
{
|
| 11718 |
+
"epoch": 0.9919386256705325,
|
| 11719 |
+
"grad_norm": 105.875,
|
| 11720 |
+
"learning_rate": 1.7850006660450246e-08,
|
| 11721 |
+
"loss": 63.7212,
|
| 11722 |
+
"step": 8275
|
| 11723 |
+
},
|
| 11724 |
+
{
|
| 11725 |
+
"epoch": 0.9925379843567383,
|
| 11726 |
+
"grad_norm": 108.375,
|
| 11727 |
+
"learning_rate": 1.6517916611162914e-08,
|
| 11728 |
+
"loss": 63.7732,
|
| 11729 |
+
"step": 8280
|
| 11730 |
+
},
|
| 11731 |
+
{
|
| 11732 |
+
"epoch": 0.9931373430429441,
|
| 11733 |
+
"grad_norm": 113.875,
|
| 11734 |
+
"learning_rate": 1.5185826561875582e-08,
|
| 11735 |
+
"loss": 64.1175,
|
| 11736 |
+
"step": 8285
|
| 11737 |
+
},
|
| 11738 |
+
{
|
| 11739 |
+
"epoch": 0.9937367017291499,
|
| 11740 |
+
"grad_norm": 106.25,
|
| 11741 |
+
"learning_rate": 1.3853736512588251e-08,
|
| 11742 |
+
"loss": 63.2011,
|
| 11743 |
+
"step": 8290
|
| 11744 |
+
},
|
| 11745 |
+
{
|
| 11746 |
+
"epoch": 0.9943360604153556,
|
| 11747 |
+
"grad_norm": 105.5625,
|
| 11748 |
+
"learning_rate": 1.2521646463300919e-08,
|
| 11749 |
+
"loss": 63.6572,
|
| 11750 |
+
"step": 8295
|
| 11751 |
+
},
|
| 11752 |
+
{
|
| 11753 |
+
"epoch": 0.9949354191015614,
|
| 11754 |
+
"grad_norm": 104.5625,
|
| 11755 |
+
"learning_rate": 1.1189556414013587e-08,
|
| 11756 |
+
"loss": 64.0572,
|
| 11757 |
+
"step": 8300
|
| 11758 |
+
},
|
| 11759 |
+
{
|
| 11760 |
+
"epoch": 0.9955347777877671,
|
| 11761 |
+
"grad_norm": 111.0625,
|
| 11762 |
+
"learning_rate": 9.857466364726254e-09,
|
| 11763 |
+
"loss": 62.2712,
|
| 11764 |
+
"step": 8305
|
| 11765 |
+
},
|
| 11766 |
+
{
|
| 11767 |
+
"epoch": 0.9961341364739729,
|
| 11768 |
+
"grad_norm": 109.1875,
|
| 11769 |
+
"learning_rate": 8.525376315438924e-09,
|
| 11770 |
+
"loss": 64.1278,
|
| 11771 |
+
"step": 8310
|
| 11772 |
+
},
|
| 11773 |
+
{
|
| 11774 |
+
"epoch": 0.9967334951601786,
|
| 11775 |
+
"grad_norm": 103.9375,
|
| 11776 |
+
"learning_rate": 7.193286266151592e-09,
|
| 11777 |
+
"loss": 63.2825,
|
| 11778 |
+
"step": 8315
|
| 11779 |
+
},
|
| 11780 |
+
{
|
| 11781 |
+
"epoch": 0.9973328538463844,
|
| 11782 |
+
"grad_norm": 108.3125,
|
| 11783 |
+
"learning_rate": 5.86119621686426e-09,
|
| 11784 |
+
"loss": 63.4902,
|
| 11785 |
+
"step": 8320
|
| 11786 |
+
},
|
| 11787 |
+
{
|
| 11788 |
+
"epoch": 0.9979322125325901,
|
| 11789 |
+
"grad_norm": 110.0,
|
| 11790 |
+
"learning_rate": 4.529106167576927e-09,
|
| 11791 |
+
"loss": 63.8995,
|
| 11792 |
+
"step": 8325
|
| 11793 |
+
},
|
| 11794 |
+
{
|
| 11795 |
+
"epoch": 0.9985315712187959,
|
| 11796 |
+
"grad_norm": 108.3125,
|
| 11797 |
+
"learning_rate": 3.1970161182895963e-09,
|
| 11798 |
+
"loss": 63.5888,
|
| 11799 |
+
"step": 8330
|
| 11800 |
+
},
|
| 11801 |
+
{
|
| 11802 |
+
"epoch": 0.9991309299050016,
|
| 11803 |
+
"grad_norm": 103.75,
|
| 11804 |
+
"learning_rate": 1.8649260690022644e-09,
|
| 11805 |
+
"loss": 62.5743,
|
| 11806 |
+
"step": 8335
|
| 11807 |
+
},
|
| 11808 |
+
{
|
| 11809 |
+
"epoch": 0.9997302885912074,
|
| 11810 |
+
"grad_norm": 107.0,
|
| 11811 |
+
"learning_rate": 5.328360197149327e-10,
|
| 11812 |
+
"loss": 62.6706,
|
| 11813 |
+
"step": 8340
|
| 11814 |
}
|
| 11815 |
],
|
| 11816 |
"logging_steps": 5,
|
|
|
|
| 11825 |
"should_evaluate": false,
|
| 11826 |
"should_log": false,
|
| 11827 |
"should_save": true,
|
| 11828 |
+
"should_training_stop": true
|
| 11829 |
},
|
| 11830 |
"attributes": {}
|
| 11831 |
}
|
| 11832 |
},
|
| 11833 |
+
"total_flos": 3.614053037573669e+19,
|
| 11834 |
"train_batch_size": 4,
|
| 11835 |
"trial_name": null,
|
| 11836 |
"trial_params": null
|