Add files using upload-large-folder tool
Browse files- checkpoint-4410.txt +0 -0
- global_step4403/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- latest +1 -1
- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- rng_state_0.pth +1 -1
- rng_state_1.pth +1 -1
- rng_state_10.pth +1 -1
- rng_state_11.pth +1 -1
- rng_state_12.pth +1 -1
- rng_state_13.pth +1 -1
- rng_state_14.pth +1 -1
- rng_state_15.pth +1 -1
- rng_state_2.pth +1 -1
- rng_state_3.pth +1 -1
- rng_state_4.pth +1 -1
- rng_state_5.pth +1 -1
- rng_state_6.pth +1 -1
- rng_state_7.pth +1 -1
- rng_state_8.pth +1 -1
- rng_state_9.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +200 -4
checkpoint-4410.txt
ADDED
|
File without changes
|
global_step4403/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:17a6a648da445af15cd5bea321fc11a5c23fdd269644dbce9ca75f1656dc2830
|
| 3 |
+
size 3760226304
|
latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step4403
|
model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4968243304
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:806858e608db4065644f4bd3251d595f67c672bfe354e29d0a3c63a5315f59fb
|
| 3 |
size 4968243304
|
model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4991495816
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb2d943f5670b6d38e1105bcd1b47158bacb3676741c7fb1a6ead2c9bd91f046
|
| 3 |
size 4991495816
|
model-00003-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4932751040
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5567189897093f0418f25ab4c3dd52a386cdb413f1c3a5ce228732fd7bd2a851
|
| 3 |
size 4932751040
|
model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1691924384
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:942ed51be07c0f5e4cbe7a38ce3ae081879e5b33d6b626bc70f69e8b38b2fec0
|
| 3 |
size 1691924384
|
rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb1165242405b17b3d6a8186ae61b13dcb1faa5a54320bebd74ef8d71b964bf7
|
| 3 |
size 15984
|
rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:562c262916c9997ec644c42fed9655ab28706b74fca20290ca921c4761d6a4b0
|
| 3 |
size 15984
|
rng_state_10.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15997
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6fd4911141e2842a3d884ce4d2109eaeeedefd435098cadf9b564c65967a6eb
|
| 3 |
size 15997
|
rng_state_11.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15997
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ba6ece259230c4e521fc019ff78563c612add590722906bc4725a8f02ad5376
|
| 3 |
size 15997
|
rng_state_12.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15997
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1bd528dc10399e3df15ae65f3098a87d293e8d4777932e1875d6dadae5f2760
|
| 3 |
size 15997
|
rng_state_13.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15997
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ddc21f5027e4c32e3cbc9daa1863acde639199470b640785b2bf069b1758850
|
| 3 |
size 15997
|
rng_state_14.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15997
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:15c7d46874297ff7a0b9cc4607aac3d077700d8f60db95b27325fad44d0ed974
|
| 3 |
size 15997
|
rng_state_15.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15997
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a20510fa92135806b278f3c0f3a327dcede9674b4eb3de8360d7643b2a73f1a
|
| 3 |
size 15997
|
rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a8d40f8118f513299624ded0a9bcf09778b961635615090409394d4f96f928f6
|
| 3 |
size 15984
|
rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4391f924238a4cb855c4cbdc6d1a14954f785431c75997d05c7a4ee6615dae7
|
| 3 |
size 15984
|
rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be7b19bb9543a16bf9f4cd96466ac581436f63070f5815f3a7ba57980608994f
|
| 3 |
size 15984
|
rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:97da4a1ede0a3e0f96411cacd5bfdf84d9355198f7aadc9bcb8be41122043f63
|
| 3 |
size 15984
|
rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:544cb6421b975bd5d2b2360a4e666003794e6197ae654d2ad963cd6572a86ede
|
| 3 |
size 15984
|
rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8d6eb32a23f3bef6262bbcb2eda724b2fd6f5e579969aa27c71a5971331722b
|
| 3 |
size 15984
|
rng_state_8.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29dee3c545638083bb257f6e165d367893ac8d5dbb07203c355bd5914e9e47a2
|
| 3 |
size 15984
|
rng_state_9.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6805b7c016228fa319f57f490b2d47f3b38a21aecf78e7cb6654a92340347d3b
|
| 3 |
size 15984
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c89398e01b5829487b6d28ed0235d051c0f71e779ce1d7835c279e8dbddad4e7
|
| 3 |
size 1064
|
trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -427,6 +427,202 @@
|
|
| 427 |
"learning_rate": 6.394557823129253e-06,
|
| 428 |
"loss": 0.0002,
|
| 429 |
"step": 3000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 430 |
}
|
| 431 |
],
|
| 432 |
"logging_steps": 50,
|
|
@@ -441,12 +637,12 @@
|
|
| 441 |
"should_evaluate": false,
|
| 442 |
"should_log": false,
|
| 443 |
"should_save": true,
|
| 444 |
-
"should_training_stop":
|
| 445 |
},
|
| 446 |
"attributes": {}
|
| 447 |
}
|
| 448 |
},
|
| 449 |
-
"total_flos":
|
| 450 |
"train_batch_size": 4,
|
| 451 |
"trial_name": null,
|
| 452 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 9.979036827195468,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 4410,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 427 |
"learning_rate": 6.394557823129253e-06,
|
| 428 |
"loss": 0.0002,
|
| 429 |
"step": 3000
|
| 430 |
+
},
|
| 431 |
+
{
|
| 432 |
+
"epoch": 6.9019830028328615,
|
| 433 |
+
"grad_norm": 0.014847405573922268,
|
| 434 |
+
"learning_rate": 6.16780045351474e-06,
|
| 435 |
+
"loss": 0.0002,
|
| 436 |
+
"step": 3050
|
| 437 |
+
},
|
| 438 |
+
{
|
| 439 |
+
"epoch": 7.013597733711048,
|
| 440 |
+
"grad_norm": 0.018305480145040905,
|
| 441 |
+
"learning_rate": 5.9410430839002275e-06,
|
| 442 |
+
"loss": 0.0002,
|
| 443 |
+
"step": 3100
|
| 444 |
+
},
|
| 445 |
+
{
|
| 446 |
+
"epoch": 7.126912181303116,
|
| 447 |
+
"grad_norm": 0.012819862819578573,
|
| 448 |
+
"learning_rate": 5.7142857142857145e-06,
|
| 449 |
+
"loss": 0.0001,
|
| 450 |
+
"step": 3150
|
| 451 |
+
},
|
| 452 |
+
{
|
| 453 |
+
"epoch": 7.240226628895184,
|
| 454 |
+
"grad_norm": 0.013371528432039266,
|
| 455 |
+
"learning_rate": 5.487528344671202e-06,
|
| 456 |
+
"loss": 0.0001,
|
| 457 |
+
"step": 3200
|
| 458 |
+
},
|
| 459 |
+
{
|
| 460 |
+
"epoch": 7.353541076487252,
|
| 461 |
+
"grad_norm": 0.005633164169599245,
|
| 462 |
+
"learning_rate": 5.260770975056689e-06,
|
| 463 |
+
"loss": 0.0001,
|
| 464 |
+
"step": 3250
|
| 465 |
+
},
|
| 466 |
+
{
|
| 467 |
+
"epoch": 7.46685552407932,
|
| 468 |
+
"grad_norm": 0.01061442292135964,
|
| 469 |
+
"learning_rate": 5.034013605442177e-06,
|
| 470 |
+
"loss": 0.0001,
|
| 471 |
+
"step": 3300
|
| 472 |
+
},
|
| 473 |
+
{
|
| 474 |
+
"epoch": 7.580169971671388,
|
| 475 |
+
"grad_norm": 0.00907172882592498,
|
| 476 |
+
"learning_rate": 4.807256235827665e-06,
|
| 477 |
+
"loss": 0.0001,
|
| 478 |
+
"step": 3350
|
| 479 |
+
},
|
| 480 |
+
{
|
| 481 |
+
"epoch": 7.693484419263456,
|
| 482 |
+
"grad_norm": 0.010227726380358893,
|
| 483 |
+
"learning_rate": 4.580498866213152e-06,
|
| 484 |
+
"loss": 0.0001,
|
| 485 |
+
"step": 3400
|
| 486 |
+
},
|
| 487 |
+
{
|
| 488 |
+
"epoch": 7.806798866855524,
|
| 489 |
+
"grad_norm": 0.008230239860332859,
|
| 490 |
+
"learning_rate": 4.35374149659864e-06,
|
| 491 |
+
"loss": 0.0001,
|
| 492 |
+
"step": 3450
|
| 493 |
+
},
|
| 494 |
+
{
|
| 495 |
+
"epoch": 7.920113314447592,
|
| 496 |
+
"grad_norm": 0.013256768674645692,
|
| 497 |
+
"learning_rate": 4.126984126984127e-06,
|
| 498 |
+
"loss": 0.0001,
|
| 499 |
+
"step": 3500
|
| 500 |
+
},
|
| 501 |
+
{
|
| 502 |
+
"epoch": 8.03172804532578,
|
| 503 |
+
"grad_norm": 0.01006097891003743,
|
| 504 |
+
"learning_rate": 3.9002267573696154e-06,
|
| 505 |
+
"loss": 0.0001,
|
| 506 |
+
"step": 3550
|
| 507 |
+
},
|
| 508 |
+
{
|
| 509 |
+
"epoch": 8.145042492917847,
|
| 510 |
+
"grad_norm": 0.009602466515273548,
|
| 511 |
+
"learning_rate": 3.6734693877551024e-06,
|
| 512 |
+
"loss": 0.0001,
|
| 513 |
+
"step": 3600
|
| 514 |
+
},
|
| 515 |
+
{
|
| 516 |
+
"epoch": 8.258356940509914,
|
| 517 |
+
"grad_norm": 0.005795654820959573,
|
| 518 |
+
"learning_rate": 3.44671201814059e-06,
|
| 519 |
+
"loss": 0.0001,
|
| 520 |
+
"step": 3650
|
| 521 |
+
},
|
| 522 |
+
{
|
| 523 |
+
"epoch": 8.371671388101984,
|
| 524 |
+
"grad_norm": 0.0034753960503702283,
|
| 525 |
+
"learning_rate": 3.2199546485260772e-06,
|
| 526 |
+
"loss": 0.0001,
|
| 527 |
+
"step": 3700
|
| 528 |
+
},
|
| 529 |
+
{
|
| 530 |
+
"epoch": 8.48498583569405,
|
| 531 |
+
"grad_norm": 0.006716712372637811,
|
| 532 |
+
"learning_rate": 2.993197278911565e-06,
|
| 533 |
+
"loss": 0.0001,
|
| 534 |
+
"step": 3750
|
| 535 |
+
},
|
| 536 |
+
{
|
| 537 |
+
"epoch": 8.598300283286118,
|
| 538 |
+
"grad_norm": 0.005691017397956627,
|
| 539 |
+
"learning_rate": 2.7664399092970525e-06,
|
| 540 |
+
"loss": 0.0001,
|
| 541 |
+
"step": 3800
|
| 542 |
+
},
|
| 543 |
+
{
|
| 544 |
+
"epoch": 8.711614730878187,
|
| 545 |
+
"grad_norm": 0.005512521725963195,
|
| 546 |
+
"learning_rate": 2.53968253968254e-06,
|
| 547 |
+
"loss": 0.0001,
|
| 548 |
+
"step": 3850
|
| 549 |
+
},
|
| 550 |
+
{
|
| 551 |
+
"epoch": 8.824929178470255,
|
| 552 |
+
"grad_norm": 0.004574835289146862,
|
| 553 |
+
"learning_rate": 2.3129251700680273e-06,
|
| 554 |
+
"loss": 0.0001,
|
| 555 |
+
"step": 3900
|
| 556 |
+
},
|
| 557 |
+
{
|
| 558 |
+
"epoch": 8.938243626062324,
|
| 559 |
+
"grad_norm": 0.005810520530240234,
|
| 560 |
+
"learning_rate": 2.086167800453515e-06,
|
| 561 |
+
"loss": 0.0001,
|
| 562 |
+
"step": 3950
|
| 563 |
+
},
|
| 564 |
+
{
|
| 565 |
+
"epoch": 9.04985835694051,
|
| 566 |
+
"grad_norm": 0.0032799015697558206,
|
| 567 |
+
"learning_rate": 1.8594104308390023e-06,
|
| 568 |
+
"loss": 0.0001,
|
| 569 |
+
"step": 4000
|
| 570 |
+
},
|
| 571 |
+
{
|
| 572 |
+
"epoch": 9.163172804532579,
|
| 573 |
+
"grad_norm": 0.0021797625948262924,
|
| 574 |
+
"learning_rate": 1.6326530612244897e-06,
|
| 575 |
+
"loss": 0.0,
|
| 576 |
+
"step": 4050
|
| 577 |
+
},
|
| 578 |
+
{
|
| 579 |
+
"epoch": 9.276487252124646,
|
| 580 |
+
"grad_norm": 0.003847390080167819,
|
| 581 |
+
"learning_rate": 1.4058956916099775e-06,
|
| 582 |
+
"loss": 0.0,
|
| 583 |
+
"step": 4100
|
| 584 |
+
},
|
| 585 |
+
{
|
| 586 |
+
"epoch": 9.389801699716713,
|
| 587 |
+
"grad_norm": 0.0025440319025627875,
|
| 588 |
+
"learning_rate": 1.179138321995465e-06,
|
| 589 |
+
"loss": 0.0,
|
| 590 |
+
"step": 4150
|
| 591 |
+
},
|
| 592 |
+
{
|
| 593 |
+
"epoch": 9.503116147308782,
|
| 594 |
+
"grad_norm": 0.002651261609298388,
|
| 595 |
+
"learning_rate": 9.523809523809525e-07,
|
| 596 |
+
"loss": 0.0,
|
| 597 |
+
"step": 4200
|
| 598 |
+
},
|
| 599 |
+
{
|
| 600 |
+
"epoch": 9.61643059490085,
|
| 601 |
+
"grad_norm": 0.0033509868369888817,
|
| 602 |
+
"learning_rate": 7.2562358276644e-07,
|
| 603 |
+
"loss": 0.0,
|
| 604 |
+
"step": 4250
|
| 605 |
+
},
|
| 606 |
+
{
|
| 607 |
+
"epoch": 9.729745042492917,
|
| 608 |
+
"grad_norm": 0.011727033108113469,
|
| 609 |
+
"learning_rate": 4.988662131519275e-07,
|
| 610 |
+
"loss": 0.0,
|
| 611 |
+
"step": 4300
|
| 612 |
+
},
|
| 613 |
+
{
|
| 614 |
+
"epoch": 9.843059490084986,
|
| 615 |
+
"grad_norm": 0.005342769601421854,
|
| 616 |
+
"learning_rate": 2.72108843537415e-07,
|
| 617 |
+
"loss": 0.0,
|
| 618 |
+
"step": 4350
|
| 619 |
+
},
|
| 620 |
+
{
|
| 621 |
+
"epoch": 9.956373937677053,
|
| 622 |
+
"grad_norm": 0.004383322417463679,
|
| 623 |
+
"learning_rate": 4.53514739229025e-08,
|
| 624 |
+
"loss": 0.0,
|
| 625 |
+
"step": 4400
|
| 626 |
}
|
| 627 |
],
|
| 628 |
"logging_steps": 50,
|
|
|
|
| 637 |
"should_evaluate": false,
|
| 638 |
"should_log": false,
|
| 639 |
"should_save": true,
|
| 640 |
+
"should_training_stop": true
|
| 641 |
},
|
| 642 |
"attributes": {}
|
| 643 |
}
|
| 644 |
},
|
| 645 |
+
"total_flos": 2.446988850023629e+16,
|
| 646 |
"train_batch_size": 4,
|
| 647 |
"trial_name": null,
|
| 648 |
"trial_params": null
|