Training in progress, step 2300, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2066752
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d403900b6e4e06d1060ea96c9f9125452e44e25ecb0fe98a4888dab20918096a
|
| 3 |
size 2066752
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4121235
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27d100c984f8d641346f5da5c506557f408847bc183236db48c58f564b4d2d81
|
| 3 |
size 4121235
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14391
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:60fe173f9860062ebc60b002a64ae72dc915d76f9849b9cb85632a7a607221b5
|
| 3 |
size 14391
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e16073f3e3321f4e6a7e2a6eca78556f1d80fe032e948e4721dde289f8623b3
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 100,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -15584,6 +15584,714 @@
|
|
| 15584 |
"eval_samples_per_second": 1.719,
|
| 15585 |
"eval_steps_per_second": 0.215,
|
| 15586 |
"step": 2200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15587 |
}
|
| 15588 |
],
|
| 15589 |
"logging_steps": 1,
|
|
@@ -15603,7 +16311,7 @@
|
|
| 15603 |
"attributes": {}
|
| 15604 |
}
|
| 15605 |
},
|
| 15606 |
-
"total_flos":
|
| 15607 |
"train_batch_size": 1,
|
| 15608 |
"trial_name": null,
|
| 15609 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.09933488814027813,
|
| 6 |
"eval_steps": 100,
|
| 7 |
+
"global_step": 2300,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 15584 |
"eval_samples_per_second": 1.719,
|
| 15585 |
"eval_steps_per_second": 0.215,
|
| 15586 |
"step": 2200
|
| 15587 |
+
},
|
| 15588 |
+
{
|
| 15589 |
+
"epoch": 0.09505916904206617,
|
| 15590 |
+
"grad_norm": 0.9140625,
|
| 15591 |
+
"learning_rate": 0.0009944730436502519,
|
| 15592 |
+
"loss": 8.6376,
|
| 15593 |
+
"step": 2201
|
| 15594 |
+
},
|
| 15595 |
+
{
|
| 15596 |
+
"epoch": 0.09510235812386629,
|
| 15597 |
+
"grad_norm": 0.515625,
|
| 15598 |
+
"learning_rate": 0.0009944624498326824,
|
| 15599 |
+
"loss": 8.4071,
|
| 15600 |
+
"step": 2202
|
| 15601 |
+
},
|
| 15602 |
+
{
|
| 15603 |
+
"epoch": 0.09514554720566641,
|
| 15604 |
+
"grad_norm": 0.484375,
|
| 15605 |
+
"learning_rate": 0.0009944518459284934,
|
| 15606 |
+
"loss": 8.4233,
|
| 15607 |
+
"step": 2203
|
| 15608 |
+
},
|
| 15609 |
+
{
|
| 15610 |
+
"epoch": 0.09518873628746653,
|
| 15611 |
+
"grad_norm": 0.70703125,
|
| 15612 |
+
"learning_rate": 0.000994441231937901,
|
| 15613 |
+
"loss": 8.4952,
|
| 15614 |
+
"step": 2204
|
| 15615 |
+
},
|
| 15616 |
+
{
|
| 15617 |
+
"epoch": 0.09523192536926665,
|
| 15618 |
+
"grad_norm": 0.67578125,
|
| 15619 |
+
"learning_rate": 0.0009944306078611223,
|
| 15620 |
+
"loss": 8.4472,
|
| 15621 |
+
"step": 2205
|
| 15622 |
+
},
|
| 15623 |
+
{
|
| 15624 |
+
"epoch": 0.09527511445106678,
|
| 15625 |
+
"grad_norm": 0.8359375,
|
| 15626 |
+
"learning_rate": 0.0009944199736983733,
|
| 15627 |
+
"loss": 8.186,
|
| 15628 |
+
"step": 2206
|
| 15629 |
+
},
|
| 15630 |
+
{
|
| 15631 |
+
"epoch": 0.0953183035328669,
|
| 15632 |
+
"grad_norm": 0.455078125,
|
| 15633 |
+
"learning_rate": 0.0009944093294498714,
|
| 15634 |
+
"loss": 8.4121,
|
| 15635 |
+
"step": 2207
|
| 15636 |
+
},
|
| 15637 |
+
{
|
| 15638 |
+
"epoch": 0.09536149261466702,
|
| 15639 |
+
"grad_norm": 0.58984375,
|
| 15640 |
+
"learning_rate": 0.0009943986751158335,
|
| 15641 |
+
"loss": 8.2663,
|
| 15642 |
+
"step": 2208
|
| 15643 |
+
},
|
| 15644 |
+
{
|
| 15645 |
+
"epoch": 0.09540468169646714,
|
| 15646 |
+
"grad_norm": 0.640625,
|
| 15647 |
+
"learning_rate": 0.000994388010696477,
|
| 15648 |
+
"loss": 8.202,
|
| 15649 |
+
"step": 2209
|
| 15650 |
+
},
|
| 15651 |
+
{
|
| 15652 |
+
"epoch": 0.09544787077826726,
|
| 15653 |
+
"grad_norm": 0.59375,
|
| 15654 |
+
"learning_rate": 0.0009943773361920198,
|
| 15655 |
+
"loss": 8.2491,
|
| 15656 |
+
"step": 2210
|
| 15657 |
+
},
|
| 15658 |
+
{
|
| 15659 |
+
"epoch": 0.09549105986006738,
|
| 15660 |
+
"grad_norm": 0.515625,
|
| 15661 |
+
"learning_rate": 0.000994366651602679,
|
| 15662 |
+
"loss": 8.4205,
|
| 15663 |
+
"step": 2211
|
| 15664 |
+
},
|
| 15665 |
+
{
|
| 15666 |
+
"epoch": 0.0955342489418675,
|
| 15667 |
+
"grad_norm": 0.6640625,
|
| 15668 |
+
"learning_rate": 0.0009943559569286732,
|
| 15669 |
+
"loss": 8.2114,
|
| 15670 |
+
"step": 2212
|
| 15671 |
+
},
|
| 15672 |
+
{
|
| 15673 |
+
"epoch": 0.09557743802366761,
|
| 15674 |
+
"grad_norm": 0.65234375,
|
| 15675 |
+
"learning_rate": 0.0009943452521702198,
|
| 15676 |
+
"loss": 8.2514,
|
| 15677 |
+
"step": 2213
|
| 15678 |
+
},
|
| 15679 |
+
{
|
| 15680 |
+
"epoch": 0.09562062710546773,
|
| 15681 |
+
"grad_norm": 0.8046875,
|
| 15682 |
+
"learning_rate": 0.000994334537327538,
|
| 15683 |
+
"loss": 8.3806,
|
| 15684 |
+
"step": 2214
|
| 15685 |
+
},
|
| 15686 |
+
{
|
| 15687 |
+
"epoch": 0.09566381618726785,
|
| 15688 |
+
"grad_norm": 0.65625,
|
| 15689 |
+
"learning_rate": 0.000994323812400846,
|
| 15690 |
+
"loss": 8.4841,
|
| 15691 |
+
"step": 2215
|
| 15692 |
+
},
|
| 15693 |
+
{
|
| 15694 |
+
"epoch": 0.09570700526906797,
|
| 15695 |
+
"grad_norm": 0.6015625,
|
| 15696 |
+
"learning_rate": 0.0009943130773903623,
|
| 15697 |
+
"loss": 8.4155,
|
| 15698 |
+
"step": 2216
|
| 15699 |
+
},
|
| 15700 |
+
{
|
| 15701 |
+
"epoch": 0.0957501943508681,
|
| 15702 |
+
"grad_norm": 0.4609375,
|
| 15703 |
+
"learning_rate": 0.0009943023322963062,
|
| 15704 |
+
"loss": 8.2637,
|
| 15705 |
+
"step": 2217
|
| 15706 |
+
},
|
| 15707 |
+
{
|
| 15708 |
+
"epoch": 0.09579338343266822,
|
| 15709 |
+
"grad_norm": 0.60546875,
|
| 15710 |
+
"learning_rate": 0.000994291577118897,
|
| 15711 |
+
"loss": 8.4826,
|
| 15712 |
+
"step": 2218
|
| 15713 |
+
},
|
| 15714 |
+
{
|
| 15715 |
+
"epoch": 0.09583657251446834,
|
| 15716 |
+
"grad_norm": 0.5390625,
|
| 15717 |
+
"learning_rate": 0.000994280811858354,
|
| 15718 |
+
"loss": 8.322,
|
| 15719 |
+
"step": 2219
|
| 15720 |
+
},
|
| 15721 |
+
{
|
| 15722 |
+
"epoch": 0.09587976159626846,
|
| 15723 |
+
"grad_norm": 0.56640625,
|
| 15724 |
+
"learning_rate": 0.0009942700365148964,
|
| 15725 |
+
"loss": 8.1471,
|
| 15726 |
+
"step": 2220
|
| 15727 |
+
},
|
| 15728 |
+
{
|
| 15729 |
+
"epoch": 0.09592295067806858,
|
| 15730 |
+
"grad_norm": 0.625,
|
| 15731 |
+
"learning_rate": 0.0009942592510887448,
|
| 15732 |
+
"loss": 8.4959,
|
| 15733 |
+
"step": 2221
|
| 15734 |
+
},
|
| 15735 |
+
{
|
| 15736 |
+
"epoch": 0.0959661397598687,
|
| 15737 |
+
"grad_norm": 0.61328125,
|
| 15738 |
+
"learning_rate": 0.0009942484555801184,
|
| 15739 |
+
"loss": 8.2918,
|
| 15740 |
+
"step": 2222
|
| 15741 |
+
},
|
| 15742 |
+
{
|
| 15743 |
+
"epoch": 0.09600932884166882,
|
| 15744 |
+
"grad_norm": 1.1796875,
|
| 15745 |
+
"learning_rate": 0.000994237649989238,
|
| 15746 |
+
"loss": 8.5336,
|
| 15747 |
+
"step": 2223
|
| 15748 |
+
},
|
| 15749 |
+
{
|
| 15750 |
+
"epoch": 0.09605251792346894,
|
| 15751 |
+
"grad_norm": 0.494140625,
|
| 15752 |
+
"learning_rate": 0.0009942268343163237,
|
| 15753 |
+
"loss": 8.5094,
|
| 15754 |
+
"step": 2224
|
| 15755 |
+
},
|
| 15756 |
+
{
|
| 15757 |
+
"epoch": 0.09609570700526907,
|
| 15758 |
+
"grad_norm": 0.6171875,
|
| 15759 |
+
"learning_rate": 0.0009942160085615961,
|
| 15760 |
+
"loss": 8.1301,
|
| 15761 |
+
"step": 2225
|
| 15762 |
+
},
|
| 15763 |
+
{
|
| 15764 |
+
"epoch": 0.09613889608706919,
|
| 15765 |
+
"grad_norm": 0.60546875,
|
| 15766 |
+
"learning_rate": 0.0009942051727252765,
|
| 15767 |
+
"loss": 8.2714,
|
| 15768 |
+
"step": 2226
|
| 15769 |
+
},
|
| 15770 |
+
{
|
| 15771 |
+
"epoch": 0.09618208516886931,
|
| 15772 |
+
"grad_norm": 0.416015625,
|
| 15773 |
+
"learning_rate": 0.0009941943268075854,
|
| 15774 |
+
"loss": 8.4242,
|
| 15775 |
+
"step": 2227
|
| 15776 |
+
},
|
| 15777 |
+
{
|
| 15778 |
+
"epoch": 0.09622527425066943,
|
| 15779 |
+
"grad_norm": 0.6640625,
|
| 15780 |
+
"learning_rate": 0.0009941834708087445,
|
| 15781 |
+
"loss": 8.463,
|
| 15782 |
+
"step": 2228
|
| 15783 |
+
},
|
| 15784 |
+
{
|
| 15785 |
+
"epoch": 0.09626846333246955,
|
| 15786 |
+
"grad_norm": 0.4453125,
|
| 15787 |
+
"learning_rate": 0.0009941726047289748,
|
| 15788 |
+
"loss": 8.305,
|
| 15789 |
+
"step": 2229
|
| 15790 |
+
},
|
| 15791 |
+
{
|
| 15792 |
+
"epoch": 0.09631165241426967,
|
| 15793 |
+
"grad_norm": 0.58984375,
|
| 15794 |
+
"learning_rate": 0.0009941617285684982,
|
| 15795 |
+
"loss": 8.2656,
|
| 15796 |
+
"step": 2230
|
| 15797 |
+
},
|
| 15798 |
+
{
|
| 15799 |
+
"epoch": 0.09635484149606979,
|
| 15800 |
+
"grad_norm": 0.41796875,
|
| 15801 |
+
"learning_rate": 0.0009941508423275366,
|
| 15802 |
+
"loss": 8.3025,
|
| 15803 |
+
"step": 2231
|
| 15804 |
+
},
|
| 15805 |
+
{
|
| 15806 |
+
"epoch": 0.09639803057786991,
|
| 15807 |
+
"grad_norm": 0.5625,
|
| 15808 |
+
"learning_rate": 0.000994139946006312,
|
| 15809 |
+
"loss": 8.2322,
|
| 15810 |
+
"step": 2232
|
| 15811 |
+
},
|
| 15812 |
+
{
|
| 15813 |
+
"epoch": 0.09644121965967004,
|
| 15814 |
+
"grad_norm": 0.51171875,
|
| 15815 |
+
"learning_rate": 0.0009941290396050467,
|
| 15816 |
+
"loss": 8.2482,
|
| 15817 |
+
"step": 2233
|
| 15818 |
+
},
|
| 15819 |
+
{
|
| 15820 |
+
"epoch": 0.09648440874147016,
|
| 15821 |
+
"grad_norm": 0.56640625,
|
| 15822 |
+
"learning_rate": 0.000994118123123963,
|
| 15823 |
+
"loss": 8.2431,
|
| 15824 |
+
"step": 2234
|
| 15825 |
+
},
|
| 15826 |
+
{
|
| 15827 |
+
"epoch": 0.09652759782327028,
|
| 15828 |
+
"grad_norm": 0.8203125,
|
| 15829 |
+
"learning_rate": 0.000994107196563284,
|
| 15830 |
+
"loss": 8.7743,
|
| 15831 |
+
"step": 2235
|
| 15832 |
+
},
|
| 15833 |
+
{
|
| 15834 |
+
"epoch": 0.0965707869050704,
|
| 15835 |
+
"grad_norm": 1.046875,
|
| 15836 |
+
"learning_rate": 0.0009940962599232323,
|
| 15837 |
+
"loss": 7.9271,
|
| 15838 |
+
"step": 2236
|
| 15839 |
+
},
|
| 15840 |
+
{
|
| 15841 |
+
"epoch": 0.09661397598687052,
|
| 15842 |
+
"grad_norm": 0.5859375,
|
| 15843 |
+
"learning_rate": 0.000994085313204031,
|
| 15844 |
+
"loss": 8.279,
|
| 15845 |
+
"step": 2237
|
| 15846 |
+
},
|
| 15847 |
+
{
|
| 15848 |
+
"epoch": 0.09665716506867064,
|
| 15849 |
+
"grad_norm": 0.60546875,
|
| 15850 |
+
"learning_rate": 0.0009940743564059037,
|
| 15851 |
+
"loss": 8.4154,
|
| 15852 |
+
"step": 2238
|
| 15853 |
+
},
|
| 15854 |
+
{
|
| 15855 |
+
"epoch": 0.09670035415047076,
|
| 15856 |
+
"grad_norm": 0.79296875,
|
| 15857 |
+
"learning_rate": 0.0009940633895290732,
|
| 15858 |
+
"loss": 8.3311,
|
| 15859 |
+
"step": 2239
|
| 15860 |
+
},
|
| 15861 |
+
{
|
| 15862 |
+
"epoch": 0.09674354323227088,
|
| 15863 |
+
"grad_norm": 0.66796875,
|
| 15864 |
+
"learning_rate": 0.000994052412573764,
|
| 15865 |
+
"loss": 8.6155,
|
| 15866 |
+
"step": 2240
|
| 15867 |
+
},
|
| 15868 |
+
{
|
| 15869 |
+
"epoch": 0.096786732314071,
|
| 15870 |
+
"grad_norm": 0.56640625,
|
| 15871 |
+
"learning_rate": 0.0009940414255401996,
|
| 15872 |
+
"loss": 8.4563,
|
| 15873 |
+
"step": 2241
|
| 15874 |
+
},
|
| 15875 |
+
{
|
| 15876 |
+
"epoch": 0.09682992139587113,
|
| 15877 |
+
"grad_norm": 0.60546875,
|
| 15878 |
+
"learning_rate": 0.0009940304284286044,
|
| 15879 |
+
"loss": 8.4334,
|
| 15880 |
+
"step": 2242
|
| 15881 |
+
},
|
| 15882 |
+
{
|
| 15883 |
+
"epoch": 0.09687311047767125,
|
| 15884 |
+
"grad_norm": 0.7109375,
|
| 15885 |
+
"learning_rate": 0.0009940194212392022,
|
| 15886 |
+
"loss": 8.3314,
|
| 15887 |
+
"step": 2243
|
| 15888 |
+
},
|
| 15889 |
+
{
|
| 15890 |
+
"epoch": 0.09691629955947137,
|
| 15891 |
+
"grad_norm": 0.47265625,
|
| 15892 |
+
"learning_rate": 0.000994008403972218,
|
| 15893 |
+
"loss": 8.7214,
|
| 15894 |
+
"step": 2244
|
| 15895 |
+
},
|
| 15896 |
+
{
|
| 15897 |
+
"epoch": 0.09695948864127149,
|
| 15898 |
+
"grad_norm": 1.0625,
|
| 15899 |
+
"learning_rate": 0.0009939973766278766,
|
| 15900 |
+
"loss": 8.297,
|
| 15901 |
+
"step": 2245
|
| 15902 |
+
},
|
| 15903 |
+
{
|
| 15904 |
+
"epoch": 0.09700267772307161,
|
| 15905 |
+
"grad_norm": 0.59765625,
|
| 15906 |
+
"learning_rate": 0.0009939863392064029,
|
| 15907 |
+
"loss": 8.4925,
|
| 15908 |
+
"step": 2246
|
| 15909 |
+
},
|
| 15910 |
+
{
|
| 15911 |
+
"epoch": 0.09704586680487173,
|
| 15912 |
+
"grad_norm": 0.447265625,
|
| 15913 |
+
"learning_rate": 0.0009939752917080217,
|
| 15914 |
+
"loss": 8.2892,
|
| 15915 |
+
"step": 2247
|
| 15916 |
+
},
|
| 15917 |
+
{
|
| 15918 |
+
"epoch": 0.09708905588667185,
|
| 15919 |
+
"grad_norm": 0.69140625,
|
| 15920 |
+
"learning_rate": 0.0009939642341329586,
|
| 15921 |
+
"loss": 8.5275,
|
| 15922 |
+
"step": 2248
|
| 15923 |
+
},
|
| 15924 |
+
{
|
| 15925 |
+
"epoch": 0.09713224496847198,
|
| 15926 |
+
"grad_norm": 2.390625,
|
| 15927 |
+
"learning_rate": 0.0009939531664814392,
|
| 15928 |
+
"loss": 8.694,
|
| 15929 |
+
"step": 2249
|
| 15930 |
+
},
|
| 15931 |
+
{
|
| 15932 |
+
"epoch": 0.0971754340502721,
|
| 15933 |
+
"grad_norm": 0.453125,
|
| 15934 |
+
"learning_rate": 0.0009939420887536893,
|
| 15935 |
+
"loss": 8.4036,
|
| 15936 |
+
"step": 2250
|
| 15937 |
+
},
|
| 15938 |
+
{
|
| 15939 |
+
"epoch": 0.09721862313207222,
|
| 15940 |
+
"grad_norm": 0.66796875,
|
| 15941 |
+
"learning_rate": 0.0009939310009499348,
|
| 15942 |
+
"loss": 8.3543,
|
| 15943 |
+
"step": 2251
|
| 15944 |
+
},
|
| 15945 |
+
{
|
| 15946 |
+
"epoch": 0.09726181221387234,
|
| 15947 |
+
"grad_norm": 0.474609375,
|
| 15948 |
+
"learning_rate": 0.0009939199030704019,
|
| 15949 |
+
"loss": 8.3396,
|
| 15950 |
+
"step": 2252
|
| 15951 |
+
},
|
| 15952 |
+
{
|
| 15953 |
+
"epoch": 0.09730500129567246,
|
| 15954 |
+
"grad_norm": 0.84765625,
|
| 15955 |
+
"learning_rate": 0.0009939087951153168,
|
| 15956 |
+
"loss": 8.3102,
|
| 15957 |
+
"step": 2253
|
| 15958 |
+
},
|
| 15959 |
+
{
|
| 15960 |
+
"epoch": 0.09734819037747257,
|
| 15961 |
+
"grad_norm": 0.4921875,
|
| 15962 |
+
"learning_rate": 0.0009938976770849065,
|
| 15963 |
+
"loss": 8.236,
|
| 15964 |
+
"step": 2254
|
| 15965 |
+
},
|
| 15966 |
+
{
|
| 15967 |
+
"epoch": 0.09739137945927269,
|
| 15968 |
+
"grad_norm": 0.61328125,
|
| 15969 |
+
"learning_rate": 0.0009938865489793976,
|
| 15970 |
+
"loss": 8.1535,
|
| 15971 |
+
"step": 2255
|
| 15972 |
+
},
|
| 15973 |
+
{
|
| 15974 |
+
"epoch": 0.09743456854107281,
|
| 15975 |
+
"grad_norm": 0.4765625,
|
| 15976 |
+
"learning_rate": 0.000993875410799017,
|
| 15977 |
+
"loss": 8.3056,
|
| 15978 |
+
"step": 2256
|
| 15979 |
+
},
|
| 15980 |
+
{
|
| 15981 |
+
"epoch": 0.09747775762287293,
|
| 15982 |
+
"grad_norm": 0.53125,
|
| 15983 |
+
"learning_rate": 0.0009938642625439917,
|
| 15984 |
+
"loss": 8.2276,
|
| 15985 |
+
"step": 2257
|
| 15986 |
+
},
|
| 15987 |
+
{
|
| 15988 |
+
"epoch": 0.09752094670467305,
|
| 15989 |
+
"grad_norm": 0.482421875,
|
| 15990 |
+
"learning_rate": 0.0009938531042145498,
|
| 15991 |
+
"loss": 8.4162,
|
| 15992 |
+
"step": 2258
|
| 15993 |
+
},
|
| 15994 |
+
{
|
| 15995 |
+
"epoch": 0.09756413578647317,
|
| 15996 |
+
"grad_norm": 0.640625,
|
| 15997 |
+
"learning_rate": 0.0009938419358109182,
|
| 15998 |
+
"loss": 8.2504,
|
| 15999 |
+
"step": 2259
|
| 16000 |
+
},
|
| 16001 |
+
{
|
| 16002 |
+
"epoch": 0.0976073248682733,
|
| 16003 |
+
"grad_norm": 0.6171875,
|
| 16004 |
+
"learning_rate": 0.0009938307573333254,
|
| 16005 |
+
"loss": 8.595,
|
| 16006 |
+
"step": 2260
|
| 16007 |
+
},
|
| 16008 |
+
{
|
| 16009 |
+
"epoch": 0.09765051395007342,
|
| 16010 |
+
"grad_norm": 0.49609375,
|
| 16011 |
+
"learning_rate": 0.0009938195687819989,
|
| 16012 |
+
"loss": 8.3727,
|
| 16013 |
+
"step": 2261
|
| 16014 |
+
},
|
| 16015 |
+
{
|
| 16016 |
+
"epoch": 0.09769370303187354,
|
| 16017 |
+
"grad_norm": 0.48828125,
|
| 16018 |
+
"learning_rate": 0.0009938083701571672,
|
| 16019 |
+
"loss": 8.4693,
|
| 16020 |
+
"step": 2262
|
| 16021 |
+
},
|
| 16022 |
+
{
|
| 16023 |
+
"epoch": 0.09773689211367366,
|
| 16024 |
+
"grad_norm": 0.859375,
|
| 16025 |
+
"learning_rate": 0.0009937971614590586,
|
| 16026 |
+
"loss": 8.383,
|
| 16027 |
+
"step": 2263
|
| 16028 |
+
},
|
| 16029 |
+
{
|
| 16030 |
+
"epoch": 0.09778008119547378,
|
| 16031 |
+
"grad_norm": 1.0078125,
|
| 16032 |
+
"learning_rate": 0.0009937859426879018,
|
| 16033 |
+
"loss": 8.1874,
|
| 16034 |
+
"step": 2264
|
| 16035 |
+
},
|
| 16036 |
+
{
|
| 16037 |
+
"epoch": 0.0978232702772739,
|
| 16038 |
+
"grad_norm": 0.4453125,
|
| 16039 |
+
"learning_rate": 0.0009937747138439256,
|
| 16040 |
+
"loss": 8.5885,
|
| 16041 |
+
"step": 2265
|
| 16042 |
+
},
|
| 16043 |
+
{
|
| 16044 |
+
"epoch": 0.09786645935907402,
|
| 16045 |
+
"grad_norm": 0.69140625,
|
| 16046 |
+
"learning_rate": 0.000993763474927359,
|
| 16047 |
+
"loss": 8.2082,
|
| 16048 |
+
"step": 2266
|
| 16049 |
+
},
|
| 16050 |
+
{
|
| 16051 |
+
"epoch": 0.09790964844087414,
|
| 16052 |
+
"grad_norm": 0.515625,
|
| 16053 |
+
"learning_rate": 0.0009937522259384317,
|
| 16054 |
+
"loss": 8.4838,
|
| 16055 |
+
"step": 2267
|
| 16056 |
+
},
|
| 16057 |
+
{
|
| 16058 |
+
"epoch": 0.09795283752267427,
|
| 16059 |
+
"grad_norm": 0.48046875,
|
| 16060 |
+
"learning_rate": 0.0009937409668773728,
|
| 16061 |
+
"loss": 8.301,
|
| 16062 |
+
"step": 2268
|
| 16063 |
+
},
|
| 16064 |
+
{
|
| 16065 |
+
"epoch": 0.09799602660447439,
|
| 16066 |
+
"grad_norm": 0.58984375,
|
| 16067 |
+
"learning_rate": 0.000993729697744412,
|
| 16068 |
+
"loss": 8.5776,
|
| 16069 |
+
"step": 2269
|
| 16070 |
+
},
|
| 16071 |
+
{
|
| 16072 |
+
"epoch": 0.09803921568627451,
|
| 16073 |
+
"grad_norm": 0.91015625,
|
| 16074 |
+
"learning_rate": 0.000993718418539779,
|
| 16075 |
+
"loss": 8.8736,
|
| 16076 |
+
"step": 2270
|
| 16077 |
+
},
|
| 16078 |
+
{
|
| 16079 |
+
"epoch": 0.09808240476807463,
|
| 16080 |
+
"grad_norm": 0.61328125,
|
| 16081 |
+
"learning_rate": 0.0009937071292637043,
|
| 16082 |
+
"loss": 8.0556,
|
| 16083 |
+
"step": 2271
|
| 16084 |
+
},
|
| 16085 |
+
{
|
| 16086 |
+
"epoch": 0.09812559384987475,
|
| 16087 |
+
"grad_norm": 0.90625,
|
| 16088 |
+
"learning_rate": 0.000993695829916418,
|
| 16089 |
+
"loss": 7.9003,
|
| 16090 |
+
"step": 2272
|
| 16091 |
+
},
|
| 16092 |
+
{
|
| 16093 |
+
"epoch": 0.09816878293167487,
|
| 16094 |
+
"grad_norm": 0.63671875,
|
| 16095 |
+
"learning_rate": 0.0009936845204981505,
|
| 16096 |
+
"loss": 8.4324,
|
| 16097 |
+
"step": 2273
|
| 16098 |
+
},
|
| 16099 |
+
{
|
| 16100 |
+
"epoch": 0.098211972013475,
|
| 16101 |
+
"grad_norm": 0.51953125,
|
| 16102 |
+
"learning_rate": 0.0009936732010091328,
|
| 16103 |
+
"loss": 8.5961,
|
| 16104 |
+
"step": 2274
|
| 16105 |
+
},
|
| 16106 |
+
{
|
| 16107 |
+
"epoch": 0.09825516109527511,
|
| 16108 |
+
"grad_norm": 0.51171875,
|
| 16109 |
+
"learning_rate": 0.0009936618714495953,
|
| 16110 |
+
"loss": 8.6324,
|
| 16111 |
+
"step": 2275
|
| 16112 |
+
},
|
| 16113 |
+
{
|
| 16114 |
+
"epoch": 0.09829835017707524,
|
| 16115 |
+
"grad_norm": 0.70703125,
|
| 16116 |
+
"learning_rate": 0.0009936505318197694,
|
| 16117 |
+
"loss": 8.0815,
|
| 16118 |
+
"step": 2276
|
| 16119 |
+
},
|
| 16120 |
+
{
|
| 16121 |
+
"epoch": 0.09834153925887536,
|
| 16122 |
+
"grad_norm": 0.48828125,
|
| 16123 |
+
"learning_rate": 0.0009936391821198866,
|
| 16124 |
+
"loss": 8.584,
|
| 16125 |
+
"step": 2277
|
| 16126 |
+
},
|
| 16127 |
+
{
|
| 16128 |
+
"epoch": 0.09838472834067548,
|
| 16129 |
+
"grad_norm": 0.63671875,
|
| 16130 |
+
"learning_rate": 0.0009936278223501782,
|
| 16131 |
+
"loss": 8.3207,
|
| 16132 |
+
"step": 2278
|
| 16133 |
+
},
|
| 16134 |
+
{
|
| 16135 |
+
"epoch": 0.0984279174224756,
|
| 16136 |
+
"grad_norm": 0.70703125,
|
| 16137 |
+
"learning_rate": 0.0009936164525108761,
|
| 16138 |
+
"loss": 8.617,
|
| 16139 |
+
"step": 2279
|
| 16140 |
+
},
|
| 16141 |
+
{
|
| 16142 |
+
"epoch": 0.09847110650427572,
|
| 16143 |
+
"grad_norm": 0.50390625,
|
| 16144 |
+
"learning_rate": 0.000993605072602212,
|
| 16145 |
+
"loss": 8.7178,
|
| 16146 |
+
"step": 2280
|
| 16147 |
+
},
|
| 16148 |
+
{
|
| 16149 |
+
"epoch": 0.09851429558607584,
|
| 16150 |
+
"grad_norm": 0.515625,
|
| 16151 |
+
"learning_rate": 0.0009935936826244182,
|
| 16152 |
+
"loss": 8.3508,
|
| 16153 |
+
"step": 2281
|
| 16154 |
+
},
|
| 16155 |
+
{
|
| 16156 |
+
"epoch": 0.09855748466787596,
|
| 16157 |
+
"grad_norm": 0.7890625,
|
| 16158 |
+
"learning_rate": 0.000993582282577727,
|
| 16159 |
+
"loss": 9.0207,
|
| 16160 |
+
"step": 2282
|
| 16161 |
+
},
|
| 16162 |
+
{
|
| 16163 |
+
"epoch": 0.09860067374967608,
|
| 16164 |
+
"grad_norm": 0.5234375,
|
| 16165 |
+
"learning_rate": 0.0009935708724623708,
|
| 16166 |
+
"loss": 8.4822,
|
| 16167 |
+
"step": 2283
|
| 16168 |
+
},
|
| 16169 |
+
{
|
| 16170 |
+
"epoch": 0.0986438628314762,
|
| 16171 |
+
"grad_norm": 0.462890625,
|
| 16172 |
+
"learning_rate": 0.0009935594522785826,
|
| 16173 |
+
"loss": 8.537,
|
| 16174 |
+
"step": 2284
|
| 16175 |
+
},
|
| 16176 |
+
{
|
| 16177 |
+
"epoch": 0.09868705191327633,
|
| 16178 |
+
"grad_norm": 0.78515625,
|
| 16179 |
+
"learning_rate": 0.0009935480220265955,
|
| 16180 |
+
"loss": 7.9235,
|
| 16181 |
+
"step": 2285
|
| 16182 |
+
},
|
| 16183 |
+
{
|
| 16184 |
+
"epoch": 0.09873024099507645,
|
| 16185 |
+
"grad_norm": 0.66015625,
|
| 16186 |
+
"learning_rate": 0.0009935365817066422,
|
| 16187 |
+
"loss": 8.3552,
|
| 16188 |
+
"step": 2286
|
| 16189 |
+
},
|
| 16190 |
+
{
|
| 16191 |
+
"epoch": 0.09877343007687657,
|
| 16192 |
+
"grad_norm": 0.64453125,
|
| 16193 |
+
"learning_rate": 0.0009935251313189565,
|
| 16194 |
+
"loss": 8.199,
|
| 16195 |
+
"step": 2287
|
| 16196 |
+
},
|
| 16197 |
+
{
|
| 16198 |
+
"epoch": 0.09881661915867669,
|
| 16199 |
+
"grad_norm": 0.50390625,
|
| 16200 |
+
"learning_rate": 0.0009935136708637716,
|
| 16201 |
+
"loss": 8.3347,
|
| 16202 |
+
"step": 2288
|
| 16203 |
+
},
|
| 16204 |
+
{
|
| 16205 |
+
"epoch": 0.09885980824047681,
|
| 16206 |
+
"grad_norm": 0.61328125,
|
| 16207 |
+
"learning_rate": 0.0009935022003413217,
|
| 16208 |
+
"loss": 8.1595,
|
| 16209 |
+
"step": 2289
|
| 16210 |
+
},
|
| 16211 |
+
{
|
| 16212 |
+
"epoch": 0.09890299732227693,
|
| 16213 |
+
"grad_norm": 0.4453125,
|
| 16214 |
+
"learning_rate": 0.0009934907197518405,
|
| 16215 |
+
"loss": 8.3218,
|
| 16216 |
+
"step": 2290
|
| 16217 |
+
},
|
| 16218 |
+
{
|
| 16219 |
+
"epoch": 0.09894618640407706,
|
| 16220 |
+
"grad_norm": 0.4921875,
|
| 16221 |
+
"learning_rate": 0.0009934792290955622,
|
| 16222 |
+
"loss": 8.3675,
|
| 16223 |
+
"step": 2291
|
| 16224 |
+
},
|
| 16225 |
+
{
|
| 16226 |
+
"epoch": 0.09898937548587718,
|
| 16227 |
+
"grad_norm": 0.6953125,
|
| 16228 |
+
"learning_rate": 0.0009934677283727211,
|
| 16229 |
+
"loss": 8.1193,
|
| 16230 |
+
"step": 2292
|
| 16231 |
+
},
|
| 16232 |
+
{
|
| 16233 |
+
"epoch": 0.0990325645676773,
|
| 16234 |
+
"grad_norm": 0.5390625,
|
| 16235 |
+
"learning_rate": 0.000993456217583552,
|
| 16236 |
+
"loss": 8.5143,
|
| 16237 |
+
"step": 2293
|
| 16238 |
+
},
|
| 16239 |
+
{
|
| 16240 |
+
"epoch": 0.09907575364947742,
|
| 16241 |
+
"grad_norm": 0.72265625,
|
| 16242 |
+
"learning_rate": 0.0009934446967282899,
|
| 16243 |
+
"loss": 7.9545,
|
| 16244 |
+
"step": 2294
|
| 16245 |
+
},
|
| 16246 |
+
{
|
| 16247 |
+
"epoch": 0.09911894273127753,
|
| 16248 |
+
"grad_norm": 0.6796875,
|
| 16249 |
+
"learning_rate": 0.0009934331658071694,
|
| 16250 |
+
"loss": 8.4399,
|
| 16251 |
+
"step": 2295
|
| 16252 |
+
},
|
| 16253 |
+
{
|
| 16254 |
+
"epoch": 0.09916213181307765,
|
| 16255 |
+
"grad_norm": 0.66796875,
|
| 16256 |
+
"learning_rate": 0.000993421624820426,
|
| 16257 |
+
"loss": 8.3492,
|
| 16258 |
+
"step": 2296
|
| 16259 |
+
},
|
| 16260 |
+
{
|
| 16261 |
+
"epoch": 0.09920532089487777,
|
| 16262 |
+
"grad_norm": 0.75,
|
| 16263 |
+
"learning_rate": 0.0009934100737682952,
|
| 16264 |
+
"loss": 8.4884,
|
| 16265 |
+
"step": 2297
|
| 16266 |
+
},
|
| 16267 |
+
{
|
| 16268 |
+
"epoch": 0.09924850997667789,
|
| 16269 |
+
"grad_norm": 0.5703125,
|
| 16270 |
+
"learning_rate": 0.0009933985126510123,
|
| 16271 |
+
"loss": 8.0807,
|
| 16272 |
+
"step": 2298
|
| 16273 |
+
},
|
| 16274 |
+
{
|
| 16275 |
+
"epoch": 0.09929169905847801,
|
| 16276 |
+
"grad_norm": 0.6171875,
|
| 16277 |
+
"learning_rate": 0.0009933869414688132,
|
| 16278 |
+
"loss": 8.3986,
|
| 16279 |
+
"step": 2299
|
| 16280 |
+
},
|
| 16281 |
+
{
|
| 16282 |
+
"epoch": 0.09933488814027813,
|
| 16283 |
+
"grad_norm": 0.478515625,
|
| 16284 |
+
"learning_rate": 0.0009933753602219342,
|
| 16285 |
+
"loss": 8.331,
|
| 16286 |
+
"step": 2300
|
| 16287 |
+
},
|
| 16288 |
+
{
|
| 16289 |
+
"epoch": 0.09933488814027813,
|
| 16290 |
+
"eval_loss": 8.394790649414062,
|
| 16291 |
+
"eval_runtime": 14.132,
|
| 16292 |
+
"eval_samples_per_second": 1.698,
|
| 16293 |
+
"eval_steps_per_second": 0.212,
|
| 16294 |
+
"step": 2300
|
| 16295 |
}
|
| 16296 |
],
|
| 16297 |
"logging_steps": 1,
|
|
|
|
| 16311 |
"attributes": {}
|
| 16312 |
}
|
| 16313 |
},
|
| 16314 |
+
"total_flos": 7352067686400.0,
|
| 16315 |
"train_batch_size": 1,
|
| 16316 |
"trial_name": null,
|
| 16317 |
"trial_params": null
|