Training in progress, step 74000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +353 -3
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 304481530
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c14f92422cc30c9605f95654d62c250bad463581bd3da10bb7b17093206005e
|
| 3 |
size 304481530
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 402029570
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cdbe93c9686a0a02ecdcba702915ad1389c2bb261f4103c48b737864febba412
|
| 3 |
size 402029570
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf863b0b895309e73d9088642dd8d00845be8fee481352073f05fd0bd67029a2
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f0942e1e9569ddb210dcd2d42bc92e339bbd2239990fd3cc546265bee775d39
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d2a7d2488bf1d4b76628b506fc6b6fb862cbf4396985e4c9e2f16e4262ba5085
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:115e6df582159f803bd87cdfeee2a6c991779cf09357b4ef2537b502b04c878f
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9391a0b437930e5697a6d0905f7bf157b3a70a9ca0d6fddfd220757077049906
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -25558,6 +25558,356 @@
|
|
| 25558 |
"learning_rate": 0.000482096607544322,
|
| 25559 |
"loss": 16.4235,
|
| 25560 |
"step": 73000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25561 |
}
|
| 25562 |
],
|
| 25563 |
"logging_steps": 20,
|
|
@@ -25577,7 +25927,7 @@
|
|
| 25577 |
"attributes": {}
|
| 25578 |
}
|
| 25579 |
},
|
| 25580 |
-
"total_flos": 5.
|
| 25581 |
"train_batch_size": 48,
|
| 25582 |
"trial_name": null,
|
| 25583 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.1096172875350331,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 74000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 25558 |
"learning_rate": 0.000482096607544322,
|
| 25559 |
"loss": 16.4235,
|
| 25560 |
"step": 73000
|
| 25561 |
+
},
|
| 25562 |
+
{
|
| 25563 |
+
"epoch": 0.10816559913254212,
|
| 25564 |
+
"grad_norm": 7.0625,
|
| 25565 |
+
"learning_rate": 0.0004820916686093145,
|
| 25566 |
+
"loss": 16.4474,
|
| 25567 |
+
"step": 73020
|
| 25568 |
+
},
|
| 25569 |
+
{
|
| 25570 |
+
"epoch": 0.1081952254264705,
|
| 25571 |
+
"grad_norm": 6.75,
|
| 25572 |
+
"learning_rate": 0.0004820867296743069,
|
| 25573 |
+
"loss": 16.4285,
|
| 25574 |
+
"step": 73040
|
| 25575 |
+
},
|
| 25576 |
+
{
|
| 25577 |
+
"epoch": 0.10822485172039889,
|
| 25578 |
+
"grad_norm": 6.40625,
|
| 25579 |
+
"learning_rate": 0.0004820817907392993,
|
| 25580 |
+
"loss": 16.5063,
|
| 25581 |
+
"step": 73060
|
| 25582 |
+
},
|
| 25583 |
+
{
|
| 25584 |
+
"epoch": 0.10825447801432728,
|
| 25585 |
+
"grad_norm": 6.3125,
|
| 25586 |
+
"learning_rate": 0.0004820768518042917,
|
| 25587 |
+
"loss": 16.421,
|
| 25588 |
+
"step": 73080
|
| 25589 |
+
},
|
| 25590 |
+
{
|
| 25591 |
+
"epoch": 0.10828410430825566,
|
| 25592 |
+
"grad_norm": 6.78125,
|
| 25593 |
+
"learning_rate": 0.0004820719128692842,
|
| 25594 |
+
"loss": 16.5147,
|
| 25595 |
+
"step": 73100
|
| 25596 |
+
},
|
| 25597 |
+
{
|
| 25598 |
+
"epoch": 0.10831373060218405,
|
| 25599 |
+
"grad_norm": 7.125,
|
| 25600 |
+
"learning_rate": 0.0004820669739342766,
|
| 25601 |
+
"loss": 16.4131,
|
| 25602 |
+
"step": 73120
|
| 25603 |
+
},
|
| 25604 |
+
{
|
| 25605 |
+
"epoch": 0.10834335689611244,
|
| 25606 |
+
"grad_norm": 6.375,
|
| 25607 |
+
"learning_rate": 0.00048206203499926906,
|
| 25608 |
+
"loss": 16.3763,
|
| 25609 |
+
"step": 73140
|
| 25610 |
+
},
|
| 25611 |
+
{
|
| 25612 |
+
"epoch": 0.10837298319004082,
|
| 25613 |
+
"grad_norm": 6.75,
|
| 25614 |
+
"learning_rate": 0.00048205709606426145,
|
| 25615 |
+
"loss": 16.402,
|
| 25616 |
+
"step": 73160
|
| 25617 |
+
},
|
| 25618 |
+
{
|
| 25619 |
+
"epoch": 0.10840260948396921,
|
| 25620 |
+
"grad_norm": 6.90625,
|
| 25621 |
+
"learning_rate": 0.00048205215712925396,
|
| 25622 |
+
"loss": 16.4398,
|
| 25623 |
+
"step": 73180
|
| 25624 |
+
},
|
| 25625 |
+
{
|
| 25626 |
+
"epoch": 0.1084322357778976,
|
| 25627 |
+
"grad_norm": 7.34375,
|
| 25628 |
+
"learning_rate": 0.00048204721819424635,
|
| 25629 |
+
"loss": 16.4245,
|
| 25630 |
+
"step": 73200
|
| 25631 |
+
},
|
| 25632 |
+
{
|
| 25633 |
+
"epoch": 0.10846186207182598,
|
| 25634 |
+
"grad_norm": 6.71875,
|
| 25635 |
+
"learning_rate": 0.00048204227925923874,
|
| 25636 |
+
"loss": 16.4691,
|
| 25637 |
+
"step": 73220
|
| 25638 |
+
},
|
| 25639 |
+
{
|
| 25640 |
+
"epoch": 0.10849148836575437,
|
| 25641 |
+
"grad_norm": 6.375,
|
| 25642 |
+
"learning_rate": 0.0004820373403242312,
|
| 25643 |
+
"loss": 16.4718,
|
| 25644 |
+
"step": 73240
|
| 25645 |
+
},
|
| 25646 |
+
{
|
| 25647 |
+
"epoch": 0.10852111465968275,
|
| 25648 |
+
"grad_norm": 6.90625,
|
| 25649 |
+
"learning_rate": 0.00048203240138922364,
|
| 25650 |
+
"loss": 16.4101,
|
| 25651 |
+
"step": 73260
|
| 25652 |
+
},
|
| 25653 |
+
{
|
| 25654 |
+
"epoch": 0.10855074095361116,
|
| 25655 |
+
"grad_norm": 6.875,
|
| 25656 |
+
"learning_rate": 0.0004820274624542161,
|
| 25657 |
+
"loss": 16.4164,
|
| 25658 |
+
"step": 73280
|
| 25659 |
+
},
|
| 25660 |
+
{
|
| 25661 |
+
"epoch": 0.10858036724753954,
|
| 25662 |
+
"grad_norm": 6.6875,
|
| 25663 |
+
"learning_rate": 0.0004820225235192085,
|
| 25664 |
+
"loss": 16.4198,
|
| 25665 |
+
"step": 73300
|
| 25666 |
+
},
|
| 25667 |
+
{
|
| 25668 |
+
"epoch": 0.10860999354146793,
|
| 25669 |
+
"grad_norm": 7.25,
|
| 25670 |
+
"learning_rate": 0.000482017584584201,
|
| 25671 |
+
"loss": 16.3898,
|
| 25672 |
+
"step": 73320
|
| 25673 |
+
},
|
| 25674 |
+
{
|
| 25675 |
+
"epoch": 0.10863961983539631,
|
| 25676 |
+
"grad_norm": 6.59375,
|
| 25677 |
+
"learning_rate": 0.0004820126456491934,
|
| 25678 |
+
"loss": 16.4542,
|
| 25679 |
+
"step": 73340
|
| 25680 |
+
},
|
| 25681 |
+
{
|
| 25682 |
+
"epoch": 0.1086692461293247,
|
| 25683 |
+
"grad_norm": 6.5625,
|
| 25684 |
+
"learning_rate": 0.0004820077067141858,
|
| 25685 |
+
"loss": 16.4689,
|
| 25686 |
+
"step": 73360
|
| 25687 |
+
},
|
| 25688 |
+
{
|
| 25689 |
+
"epoch": 0.10869887242325309,
|
| 25690 |
+
"grad_norm": 7.8125,
|
| 25691 |
+
"learning_rate": 0.0004820027677791782,
|
| 25692 |
+
"loss": 16.3908,
|
| 25693 |
+
"step": 73380
|
| 25694 |
+
},
|
| 25695 |
+
{
|
| 25696 |
+
"epoch": 0.10872849871718147,
|
| 25697 |
+
"grad_norm": 7.625,
|
| 25698 |
+
"learning_rate": 0.0004819978288441707,
|
| 25699 |
+
"loss": 16.4518,
|
| 25700 |
+
"step": 73400
|
| 25701 |
+
},
|
| 25702 |
+
{
|
| 25703 |
+
"epoch": 0.10875812501110986,
|
| 25704 |
+
"grad_norm": 5.65625,
|
| 25705 |
+
"learning_rate": 0.0004819928899091631,
|
| 25706 |
+
"loss": 16.4547,
|
| 25707 |
+
"step": 73420
|
| 25708 |
+
},
|
| 25709 |
+
{
|
| 25710 |
+
"epoch": 0.10878775130503825,
|
| 25711 |
+
"grad_norm": 7.21875,
|
| 25712 |
+
"learning_rate": 0.00048198795097415556,
|
| 25713 |
+
"loss": 16.4573,
|
| 25714 |
+
"step": 73440
|
| 25715 |
+
},
|
| 25716 |
+
{
|
| 25717 |
+
"epoch": 0.10881737759896663,
|
| 25718 |
+
"grad_norm": 6.59375,
|
| 25719 |
+
"learning_rate": 0.00048198301203914795,
|
| 25720 |
+
"loss": 16.4254,
|
| 25721 |
+
"step": 73460
|
| 25722 |
+
},
|
| 25723 |
+
{
|
| 25724 |
+
"epoch": 0.10884700389289502,
|
| 25725 |
+
"grad_norm": 6.53125,
|
| 25726 |
+
"learning_rate": 0.00048197807310414046,
|
| 25727 |
+
"loss": 16.4479,
|
| 25728 |
+
"step": 73480
|
| 25729 |
+
},
|
| 25730 |
+
{
|
| 25731 |
+
"epoch": 0.1088766301868234,
|
| 25732 |
+
"grad_norm": 6.59375,
|
| 25733 |
+
"learning_rate": 0.00048197313416913285,
|
| 25734 |
+
"loss": 16.374,
|
| 25735 |
+
"step": 73500
|
| 25736 |
+
},
|
| 25737 |
+
{
|
| 25738 |
+
"epoch": 0.10890625648075179,
|
| 25739 |
+
"grad_norm": 7.0625,
|
| 25740 |
+
"learning_rate": 0.0004819681952341253,
|
| 25741 |
+
"loss": 16.4854,
|
| 25742 |
+
"step": 73520
|
| 25743 |
+
},
|
| 25744 |
+
{
|
| 25745 |
+
"epoch": 0.10893588277468018,
|
| 25746 |
+
"grad_norm": 6.5625,
|
| 25747 |
+
"learning_rate": 0.0004819632562991177,
|
| 25748 |
+
"loss": 16.373,
|
| 25749 |
+
"step": 73540
|
| 25750 |
+
},
|
| 25751 |
+
{
|
| 25752 |
+
"epoch": 0.10896550906860857,
|
| 25753 |
+
"grad_norm": 5.6875,
|
| 25754 |
+
"learning_rate": 0.00048195831736411014,
|
| 25755 |
+
"loss": 16.4403,
|
| 25756 |
+
"step": 73560
|
| 25757 |
+
},
|
| 25758 |
+
{
|
| 25759 |
+
"epoch": 0.10899513536253697,
|
| 25760 |
+
"grad_norm": 7.28125,
|
| 25761 |
+
"learning_rate": 0.0004819533784291026,
|
| 25762 |
+
"loss": 16.4399,
|
| 25763 |
+
"step": 73580
|
| 25764 |
+
},
|
| 25765 |
+
{
|
| 25766 |
+
"epoch": 0.10902476165646535,
|
| 25767 |
+
"grad_norm": 6.0,
|
| 25768 |
+
"learning_rate": 0.000481948439494095,
|
| 25769 |
+
"loss": 16.4257,
|
| 25770 |
+
"step": 73600
|
| 25771 |
+
},
|
| 25772 |
+
{
|
| 25773 |
+
"epoch": 0.10905438795039374,
|
| 25774 |
+
"grad_norm": 7.0,
|
| 25775 |
+
"learning_rate": 0.0004819435005590875,
|
| 25776 |
+
"loss": 16.3843,
|
| 25777 |
+
"step": 73620
|
| 25778 |
+
},
|
| 25779 |
+
{
|
| 25780 |
+
"epoch": 0.10908401424432213,
|
| 25781 |
+
"grad_norm": 6.5625,
|
| 25782 |
+
"learning_rate": 0.0004819385616240799,
|
| 25783 |
+
"loss": 16.4221,
|
| 25784 |
+
"step": 73640
|
| 25785 |
+
},
|
| 25786 |
+
{
|
| 25787 |
+
"epoch": 0.10911364053825051,
|
| 25788 |
+
"grad_norm": 6.90625,
|
| 25789 |
+
"learning_rate": 0.0004819336226890723,
|
| 25790 |
+
"loss": 16.4464,
|
| 25791 |
+
"step": 73660
|
| 25792 |
+
},
|
| 25793 |
+
{
|
| 25794 |
+
"epoch": 0.1091432668321789,
|
| 25795 |
+
"grad_norm": 6.875,
|
| 25796 |
+
"learning_rate": 0.0004819286837540647,
|
| 25797 |
+
"loss": 16.4325,
|
| 25798 |
+
"step": 73680
|
| 25799 |
+
},
|
| 25800 |
+
{
|
| 25801 |
+
"epoch": 0.10917289312610728,
|
| 25802 |
+
"grad_norm": 6.09375,
|
| 25803 |
+
"learning_rate": 0.0004819237448190572,
|
| 25804 |
+
"loss": 16.4222,
|
| 25805 |
+
"step": 73700
|
| 25806 |
+
},
|
| 25807 |
+
{
|
| 25808 |
+
"epoch": 0.10920251942003567,
|
| 25809 |
+
"grad_norm": 6.9375,
|
| 25810 |
+
"learning_rate": 0.0004819188058840496,
|
| 25811 |
+
"loss": 16.4236,
|
| 25812 |
+
"step": 73720
|
| 25813 |
+
},
|
| 25814 |
+
{
|
| 25815 |
+
"epoch": 0.10923214571396406,
|
| 25816 |
+
"grad_norm": 6.0625,
|
| 25817 |
+
"learning_rate": 0.00048191386694904206,
|
| 25818 |
+
"loss": 16.4719,
|
| 25819 |
+
"step": 73740
|
| 25820 |
+
},
|
| 25821 |
+
{
|
| 25822 |
+
"epoch": 0.10926177200789244,
|
| 25823 |
+
"grad_norm": 6.78125,
|
| 25824 |
+
"learning_rate": 0.00048190892801403445,
|
| 25825 |
+
"loss": 16.4062,
|
| 25826 |
+
"step": 73760
|
| 25827 |
+
},
|
| 25828 |
+
{
|
| 25829 |
+
"epoch": 0.10929139830182083,
|
| 25830 |
+
"grad_norm": 7.0,
|
| 25831 |
+
"learning_rate": 0.00048190398907902696,
|
| 25832 |
+
"loss": 16.4468,
|
| 25833 |
+
"step": 73780
|
| 25834 |
+
},
|
| 25835 |
+
{
|
| 25836 |
+
"epoch": 0.10932102459574922,
|
| 25837 |
+
"grad_norm": 6.6875,
|
| 25838 |
+
"learning_rate": 0.00048189905014401935,
|
| 25839 |
+
"loss": 16.4426,
|
| 25840 |
+
"step": 73800
|
| 25841 |
+
},
|
| 25842 |
+
{
|
| 25843 |
+
"epoch": 0.1093506508896776,
|
| 25844 |
+
"grad_norm": 6.625,
|
| 25845 |
+
"learning_rate": 0.0004818941112090118,
|
| 25846 |
+
"loss": 16.4042,
|
| 25847 |
+
"step": 73820
|
| 25848 |
+
},
|
| 25849 |
+
{
|
| 25850 |
+
"epoch": 0.10938027718360599,
|
| 25851 |
+
"grad_norm": 6.53125,
|
| 25852 |
+
"learning_rate": 0.0004818891722740042,
|
| 25853 |
+
"loss": 16.489,
|
| 25854 |
+
"step": 73840
|
| 25855 |
+
},
|
| 25856 |
+
{
|
| 25857 |
+
"epoch": 0.10940990347753438,
|
| 25858 |
+
"grad_norm": 7.25,
|
| 25859 |
+
"learning_rate": 0.0004818842333389967,
|
| 25860 |
+
"loss": 16.4129,
|
| 25861 |
+
"step": 73860
|
| 25862 |
+
},
|
| 25863 |
+
{
|
| 25864 |
+
"epoch": 0.10943952977146276,
|
| 25865 |
+
"grad_norm": 6.59375,
|
| 25866 |
+
"learning_rate": 0.0004818792944039891,
|
| 25867 |
+
"loss": 16.4828,
|
| 25868 |
+
"step": 73880
|
| 25869 |
+
},
|
| 25870 |
+
{
|
| 25871 |
+
"epoch": 0.10946915606539116,
|
| 25872 |
+
"grad_norm": 6.03125,
|
| 25873 |
+
"learning_rate": 0.0004818743554689815,
|
| 25874 |
+
"loss": 16.4081,
|
| 25875 |
+
"step": 73900
|
| 25876 |
+
},
|
| 25877 |
+
{
|
| 25878 |
+
"epoch": 0.10949878235931955,
|
| 25879 |
+
"grad_norm": 6.40625,
|
| 25880 |
+
"learning_rate": 0.000481869416533974,
|
| 25881 |
+
"loss": 16.441,
|
| 25882 |
+
"step": 73920
|
| 25883 |
+
},
|
| 25884 |
+
{
|
| 25885 |
+
"epoch": 0.10952840865324794,
|
| 25886 |
+
"grad_norm": 6.96875,
|
| 25887 |
+
"learning_rate": 0.0004818644775989664,
|
| 25888 |
+
"loss": 16.4013,
|
| 25889 |
+
"step": 73940
|
| 25890 |
+
},
|
| 25891 |
+
{
|
| 25892 |
+
"epoch": 0.10955803494717632,
|
| 25893 |
+
"grad_norm": 7.46875,
|
| 25894 |
+
"learning_rate": 0.0004818595386639588,
|
| 25895 |
+
"loss": 16.4102,
|
| 25896 |
+
"step": 73960
|
| 25897 |
+
},
|
| 25898 |
+
{
|
| 25899 |
+
"epoch": 0.10958766124110471,
|
| 25900 |
+
"grad_norm": 7.09375,
|
| 25901 |
+
"learning_rate": 0.0004818545997289512,
|
| 25902 |
+
"loss": 16.3879,
|
| 25903 |
+
"step": 73980
|
| 25904 |
+
},
|
| 25905 |
+
{
|
| 25906 |
+
"epoch": 0.1096172875350331,
|
| 25907 |
+
"grad_norm": 7.0,
|
| 25908 |
+
"learning_rate": 0.0004818496607939437,
|
| 25909 |
+
"loss": 16.3994,
|
| 25910 |
+
"step": 74000
|
| 25911 |
}
|
| 25912 |
],
|
| 25913 |
"logging_steps": 20,
|
|
|
|
| 25927 |
"attributes": {}
|
| 25928 |
}
|
| 25929 |
},
|
| 25930 |
+
"total_flos": 5.440780396085746e+19,
|
| 25931 |
"train_batch_size": 48,
|
| 25932 |
"trial_name": null,
|
| 25933 |
"trial_params": null
|