Training in progress, step 12000, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/rng_state_4.pth +2 -2
- last-checkpoint/rng_state_5.pth +2 -2
- last-checkpoint/rng_state_6.pth +2 -2
- last-checkpoint/rng_state_7.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +702 -2
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 737582948
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b198e43aac6859985d7c9cb18c2860594033b256136cc0b0e915d584614c895c
|
| 3 |
size 737582948
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1475256250
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:64ec40cbe8543eb2855a915aee21dd1f77e088ec666a079a32133adde9da7af4
|
| 3 |
size 1475256250
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae238f666763a7993ec652c03f60677cb3de9003ea7ee1bc1dac41c2065a9c25
|
| 3 |
+
size 15920
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d5ab92b6e335feba5c54de89db3c87b707994c34e8ae94b68ceaf1c0e44c4698
|
| 3 |
size 15920
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2632a9a94c203af7029ed1ba1b5fb0c1a8126e97bbd443fb5dba117f62e54913
|
| 3 |
size 15920
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:add85b850cbfe5b8cf5c4f2e6f71a61a7d77d12000e589671d2903fa92c8b4c3
|
| 3 |
+
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7645a2766e30c501c310ca2b1baf3bd1106ec431388b54ca1a7f2f6cc5531dbd
|
| 3 |
+
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cad911d83e601a147b8872de9ba34bade0b9837051abcd270f992115bb282348
|
| 3 |
+
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:36c25b50d403f4b10a416c3c4294b21a8f3a8f0d8b348d5a613cd951ffd7b66c
|
| 3 |
+
size 15920
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4aa3be7aba10932fe3b181dbc7c647b64be83ff98de84fe2b9cd6b26e86aafe
|
| 3 |
+
size 15920
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4297fd04c7ed2579ce63e17f4b5a76a418be4b988ee50b810797fa07318b7ac1
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -7707,6 +7707,706 @@
|
|
| 7707 |
"learning_rate": 4.98437306883095e-05,
|
| 7708 |
"loss": 0.682,
|
| 7709 |
"step": 11000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7710 |
}
|
| 7711 |
],
|
| 7712 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 6.106922942290251,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 12000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 7707 |
"learning_rate": 4.98437306883095e-05,
|
| 7708 |
"loss": 0.682,
|
| 7709 |
"step": 11000
|
| 7710 |
+
},
|
| 7711 |
+
{
|
| 7712 |
+
"epoch": 5.603350888637847,
|
| 7713 |
+
"grad_norm": 0.10835061222314835,
|
| 7714 |
+
"learning_rate": 4.9843588625298863e-05,
|
| 7715 |
+
"loss": 0.6829,
|
| 7716 |
+
"step": 11010
|
| 7717 |
+
},
|
| 7718 |
+
{
|
| 7719 |
+
"epoch": 5.6084424573183345,
|
| 7720 |
+
"grad_norm": 0.1209336370229721,
|
| 7721 |
+
"learning_rate": 4.9843446562288236e-05,
|
| 7722 |
+
"loss": 0.6808,
|
| 7723 |
+
"step": 11020
|
| 7724 |
+
},
|
| 7725 |
+
{
|
| 7726 |
+
"epoch": 5.613534025998822,
|
| 7727 |
+
"grad_norm": 0.12438962608575821,
|
| 7728 |
+
"learning_rate": 4.984330449927761e-05,
|
| 7729 |
+
"loss": 0.6768,
|
| 7730 |
+
"step": 11030
|
| 7731 |
+
},
|
| 7732 |
+
{
|
| 7733 |
+
"epoch": 5.618625594679311,
|
| 7734 |
+
"grad_norm": 0.1364268809556961,
|
| 7735 |
+
"learning_rate": 4.984316243626698e-05,
|
| 7736 |
+
"loss": 0.6781,
|
| 7737 |
+
"step": 11040
|
| 7738 |
+
},
|
| 7739 |
+
{
|
| 7740 |
+
"epoch": 5.623717163359799,
|
| 7741 |
+
"grad_norm": 0.11569849401712418,
|
| 7742 |
+
"learning_rate": 4.9843020373256356e-05,
|
| 7743 |
+
"loss": 0.6825,
|
| 7744 |
+
"step": 11050
|
| 7745 |
+
},
|
| 7746 |
+
{
|
| 7747 |
+
"epoch": 5.628808732040287,
|
| 7748 |
+
"grad_norm": 0.10072596371173859,
|
| 7749 |
+
"learning_rate": 4.984287831024573e-05,
|
| 7750 |
+
"loss": 0.6764,
|
| 7751 |
+
"step": 11060
|
| 7752 |
+
},
|
| 7753 |
+
{
|
| 7754 |
+
"epoch": 5.633900300720775,
|
| 7755 |
+
"grad_norm": 0.15180449187755585,
|
| 7756 |
+
"learning_rate": 4.98427362472351e-05,
|
| 7757 |
+
"loss": 0.6782,
|
| 7758 |
+
"step": 11070
|
| 7759 |
+
},
|
| 7760 |
+
{
|
| 7761 |
+
"epoch": 5.638991869401263,
|
| 7762 |
+
"grad_norm": 0.14204277098178864,
|
| 7763 |
+
"learning_rate": 4.9842594184224475e-05,
|
| 7764 |
+
"loss": 0.6806,
|
| 7765 |
+
"step": 11080
|
| 7766 |
+
},
|
| 7767 |
+
{
|
| 7768 |
+
"epoch": 5.6440834380817515,
|
| 7769 |
+
"grad_norm": 0.12409929186105728,
|
| 7770 |
+
"learning_rate": 4.984245212121385e-05,
|
| 7771 |
+
"loss": 0.6806,
|
| 7772 |
+
"step": 11090
|
| 7773 |
+
},
|
| 7774 |
+
{
|
| 7775 |
+
"epoch": 5.649175006762239,
|
| 7776 |
+
"grad_norm": 0.1692194640636444,
|
| 7777 |
+
"learning_rate": 4.9842310058203215e-05,
|
| 7778 |
+
"loss": 0.6723,
|
| 7779 |
+
"step": 11100
|
| 7780 |
+
},
|
| 7781 |
+
{
|
| 7782 |
+
"epoch": 5.654266575442728,
|
| 7783 |
+
"grad_norm": 0.2566402852535248,
|
| 7784 |
+
"learning_rate": 4.984216799519259e-05,
|
| 7785 |
+
"loss": 0.6845,
|
| 7786 |
+
"step": 11110
|
| 7787 |
+
},
|
| 7788 |
+
{
|
| 7789 |
+
"epoch": 5.659358144123216,
|
| 7790 |
+
"grad_norm": 0.13745322823524475,
|
| 7791 |
+
"learning_rate": 4.984202593218196e-05,
|
| 7792 |
+
"loss": 0.6748,
|
| 7793 |
+
"step": 11120
|
| 7794 |
+
},
|
| 7795 |
+
{
|
| 7796 |
+
"epoch": 5.664449712803704,
|
| 7797 |
+
"grad_norm": 0.16598811745643616,
|
| 7798 |
+
"learning_rate": 4.9841883869171334e-05,
|
| 7799 |
+
"loss": 0.6798,
|
| 7800 |
+
"step": 11130
|
| 7801 |
+
},
|
| 7802 |
+
{
|
| 7803 |
+
"epoch": 5.669541281484192,
|
| 7804 |
+
"grad_norm": 0.13570183515548706,
|
| 7805 |
+
"learning_rate": 4.984174180616071e-05,
|
| 7806 |
+
"loss": 0.6797,
|
| 7807 |
+
"step": 11140
|
| 7808 |
+
},
|
| 7809 |
+
{
|
| 7810 |
+
"epoch": 5.674632850164681,
|
| 7811 |
+
"grad_norm": 0.17549622058868408,
|
| 7812 |
+
"learning_rate": 4.984159974315008e-05,
|
| 7813 |
+
"loss": 0.6773,
|
| 7814 |
+
"step": 11150
|
| 7815 |
+
},
|
| 7816 |
+
{
|
| 7817 |
+
"epoch": 5.6797244188451685,
|
| 7818 |
+
"grad_norm": 0.15479332208633423,
|
| 7819 |
+
"learning_rate": 4.984145768013945e-05,
|
| 7820 |
+
"loss": 0.6795,
|
| 7821 |
+
"step": 11160
|
| 7822 |
+
},
|
| 7823 |
+
{
|
| 7824 |
+
"epoch": 5.684815987525656,
|
| 7825 |
+
"grad_norm": 0.1562296450138092,
|
| 7826 |
+
"learning_rate": 4.9841315617128826e-05,
|
| 7827 |
+
"loss": 0.6803,
|
| 7828 |
+
"step": 11170
|
| 7829 |
+
},
|
| 7830 |
+
{
|
| 7831 |
+
"epoch": 5.689907556206145,
|
| 7832 |
+
"grad_norm": 0.13014480471611023,
|
| 7833 |
+
"learning_rate": 4.98411735541182e-05,
|
| 7834 |
+
"loss": 0.6793,
|
| 7835 |
+
"step": 11180
|
| 7836 |
+
},
|
| 7837 |
+
{
|
| 7838 |
+
"epoch": 5.694999124886633,
|
| 7839 |
+
"grad_norm": 0.1577223241329193,
|
| 7840 |
+
"learning_rate": 4.984103149110757e-05,
|
| 7841 |
+
"loss": 0.6845,
|
| 7842 |
+
"step": 11190
|
| 7843 |
+
},
|
| 7844 |
+
{
|
| 7845 |
+
"epoch": 5.700090693567121,
|
| 7846 |
+
"grad_norm": 0.14906632900238037,
|
| 7847 |
+
"learning_rate": 4.9840889428096946e-05,
|
| 7848 |
+
"loss": 0.6771,
|
| 7849 |
+
"step": 11200
|
| 7850 |
+
},
|
| 7851 |
+
{
|
| 7852 |
+
"epoch": 5.705182262247609,
|
| 7853 |
+
"grad_norm": 0.15042632818222046,
|
| 7854 |
+
"learning_rate": 4.984074736508632e-05,
|
| 7855 |
+
"loss": 0.6737,
|
| 7856 |
+
"step": 11210
|
| 7857 |
+
},
|
| 7858 |
+
{
|
| 7859 |
+
"epoch": 5.710273830928098,
|
| 7860 |
+
"grad_norm": 0.1530093252658844,
|
| 7861 |
+
"learning_rate": 4.9840605302075685e-05,
|
| 7862 |
+
"loss": 0.6804,
|
| 7863 |
+
"step": 11220
|
| 7864 |
+
},
|
| 7865 |
+
{
|
| 7866 |
+
"epoch": 5.715365399608586,
|
| 7867 |
+
"grad_norm": 0.18300846219062805,
|
| 7868 |
+
"learning_rate": 4.984046323906506e-05,
|
| 7869 |
+
"loss": 0.6752,
|
| 7870 |
+
"step": 11230
|
| 7871 |
+
},
|
| 7872 |
+
{
|
| 7873 |
+
"epoch": 5.720456968289074,
|
| 7874 |
+
"grad_norm": 0.14398545026779175,
|
| 7875 |
+
"learning_rate": 4.9840321176054424e-05,
|
| 7876 |
+
"loss": 0.6793,
|
| 7877 |
+
"step": 11240
|
| 7878 |
+
},
|
| 7879 |
+
{
|
| 7880 |
+
"epoch": 5.725548536969562,
|
| 7881 |
+
"grad_norm": 0.12745435535907745,
|
| 7882 |
+
"learning_rate": 4.98401791130438e-05,
|
| 7883 |
+
"loss": 0.6765,
|
| 7884 |
+
"step": 11250
|
| 7885 |
+
},
|
| 7886 |
+
{
|
| 7887 |
+
"epoch": 5.73064010565005,
|
| 7888 |
+
"grad_norm": 0.15162277221679688,
|
| 7889 |
+
"learning_rate": 4.984003705003317e-05,
|
| 7890 |
+
"loss": 0.6744,
|
| 7891 |
+
"step": 11260
|
| 7892 |
+
},
|
| 7893 |
+
{
|
| 7894 |
+
"epoch": 5.735731674330538,
|
| 7895 |
+
"grad_norm": 0.12970998883247375,
|
| 7896 |
+
"learning_rate": 4.9839894987022544e-05,
|
| 7897 |
+
"loss": 0.6818,
|
| 7898 |
+
"step": 11270
|
| 7899 |
+
},
|
| 7900 |
+
{
|
| 7901 |
+
"epoch": 5.740823243011026,
|
| 7902 |
+
"grad_norm": 0.1195228323340416,
|
| 7903 |
+
"learning_rate": 4.983975292401192e-05,
|
| 7904 |
+
"loss": 0.6749,
|
| 7905 |
+
"step": 11280
|
| 7906 |
+
},
|
| 7907 |
+
{
|
| 7908 |
+
"epoch": 5.745914811691515,
|
| 7909 |
+
"grad_norm": 0.14821238815784454,
|
| 7910 |
+
"learning_rate": 4.983961086100129e-05,
|
| 7911 |
+
"loss": 0.6759,
|
| 7912 |
+
"step": 11290
|
| 7913 |
+
},
|
| 7914 |
+
{
|
| 7915 |
+
"epoch": 5.751006380372003,
|
| 7916 |
+
"grad_norm": 0.18345175683498383,
|
| 7917 |
+
"learning_rate": 4.983946879799066e-05,
|
| 7918 |
+
"loss": 0.6736,
|
| 7919 |
+
"step": 11300
|
| 7920 |
+
},
|
| 7921 |
+
{
|
| 7922 |
+
"epoch": 5.75609794905249,
|
| 7923 |
+
"grad_norm": 0.14165613055229187,
|
| 7924 |
+
"learning_rate": 4.9839326734980036e-05,
|
| 7925 |
+
"loss": 0.6777,
|
| 7926 |
+
"step": 11310
|
| 7927 |
+
},
|
| 7928 |
+
{
|
| 7929 |
+
"epoch": 5.761189517732979,
|
| 7930 |
+
"grad_norm": 0.16045770049095154,
|
| 7931 |
+
"learning_rate": 4.983918467196941e-05,
|
| 7932 |
+
"loss": 0.678,
|
| 7933 |
+
"step": 11320
|
| 7934 |
+
},
|
| 7935 |
+
{
|
| 7936 |
+
"epoch": 5.766281086413467,
|
| 7937 |
+
"grad_norm": 0.1490974873304367,
|
| 7938 |
+
"learning_rate": 4.983904260895878e-05,
|
| 7939 |
+
"loss": 0.68,
|
| 7940 |
+
"step": 11330
|
| 7941 |
+
},
|
| 7942 |
+
{
|
| 7943 |
+
"epoch": 5.7713726550939555,
|
| 7944 |
+
"grad_norm": 0.11064887046813965,
|
| 7945 |
+
"learning_rate": 4.9838900545948156e-05,
|
| 7946 |
+
"loss": 0.6832,
|
| 7947 |
+
"step": 11340
|
| 7948 |
+
},
|
| 7949 |
+
{
|
| 7950 |
+
"epoch": 5.776464223774443,
|
| 7951 |
+
"grad_norm": 0.11848734319210052,
|
| 7952 |
+
"learning_rate": 4.983875848293753e-05,
|
| 7953 |
+
"loss": 0.6792,
|
| 7954 |
+
"step": 11350
|
| 7955 |
+
},
|
| 7956 |
+
{
|
| 7957 |
+
"epoch": 5.781555792454932,
|
| 7958 |
+
"grad_norm": 0.1246313750743866,
|
| 7959 |
+
"learning_rate": 4.9838616419926895e-05,
|
| 7960 |
+
"loss": 0.6794,
|
| 7961 |
+
"step": 11360
|
| 7962 |
+
},
|
| 7963 |
+
{
|
| 7964 |
+
"epoch": 5.78664736113542,
|
| 7965 |
+
"grad_norm": 0.17359575629234314,
|
| 7966 |
+
"learning_rate": 4.983847435691627e-05,
|
| 7967 |
+
"loss": 0.6762,
|
| 7968 |
+
"step": 11370
|
| 7969 |
+
},
|
| 7970 |
+
{
|
| 7971 |
+
"epoch": 5.791738929815908,
|
| 7972 |
+
"grad_norm": 0.16471154987812042,
|
| 7973 |
+
"learning_rate": 4.983833229390564e-05,
|
| 7974 |
+
"loss": 0.6742,
|
| 7975 |
+
"step": 11380
|
| 7976 |
+
},
|
| 7977 |
+
{
|
| 7978 |
+
"epoch": 5.796830498496396,
|
| 7979 |
+
"grad_norm": 0.1479930430650711,
|
| 7980 |
+
"learning_rate": 4.9838190230895014e-05,
|
| 7981 |
+
"loss": 0.678,
|
| 7982 |
+
"step": 11390
|
| 7983 |
+
},
|
| 7984 |
+
{
|
| 7985 |
+
"epoch": 5.801922067176884,
|
| 7986 |
+
"grad_norm": 0.11385341733694077,
|
| 7987 |
+
"learning_rate": 4.983804816788439e-05,
|
| 7988 |
+
"loss": 0.6791,
|
| 7989 |
+
"step": 11400
|
| 7990 |
+
},
|
| 7991 |
+
{
|
| 7992 |
+
"epoch": 5.8070136358573725,
|
| 7993 |
+
"grad_norm": 0.13574256002902985,
|
| 7994 |
+
"learning_rate": 4.983790610487376e-05,
|
| 7995 |
+
"loss": 0.6795,
|
| 7996 |
+
"step": 11410
|
| 7997 |
+
},
|
| 7998 |
+
{
|
| 7999 |
+
"epoch": 5.81210520453786,
|
| 8000 |
+
"grad_norm": 0.1701575517654419,
|
| 8001 |
+
"learning_rate": 4.9837764041863134e-05,
|
| 8002 |
+
"loss": 0.6791,
|
| 8003 |
+
"step": 11420
|
| 8004 |
+
},
|
| 8005 |
+
{
|
| 8006 |
+
"epoch": 5.817196773218349,
|
| 8007 |
+
"grad_norm": 0.11972179263830185,
|
| 8008 |
+
"learning_rate": 4.98376219788525e-05,
|
| 8009 |
+
"loss": 0.6802,
|
| 8010 |
+
"step": 11430
|
| 8011 |
+
},
|
| 8012 |
+
{
|
| 8013 |
+
"epoch": 5.822288341898837,
|
| 8014 |
+
"grad_norm": 0.15830230712890625,
|
| 8015 |
+
"learning_rate": 4.983747991584187e-05,
|
| 8016 |
+
"loss": 0.6761,
|
| 8017 |
+
"step": 11440
|
| 8018 |
+
},
|
| 8019 |
+
{
|
| 8020 |
+
"epoch": 5.827379910579325,
|
| 8021 |
+
"grad_norm": 0.16592001914978027,
|
| 8022 |
+
"learning_rate": 4.9837337852831246e-05,
|
| 8023 |
+
"loss": 0.6768,
|
| 8024 |
+
"step": 11450
|
| 8025 |
+
},
|
| 8026 |
+
{
|
| 8027 |
+
"epoch": 5.832471479259813,
|
| 8028 |
+
"grad_norm": 0.21496979892253876,
|
| 8029 |
+
"learning_rate": 4.983719578982062e-05,
|
| 8030 |
+
"loss": 0.6783,
|
| 8031 |
+
"step": 11460
|
| 8032 |
+
},
|
| 8033 |
+
{
|
| 8034 |
+
"epoch": 5.837563047940302,
|
| 8035 |
+
"grad_norm": 0.14850680530071259,
|
| 8036 |
+
"learning_rate": 4.983705372680999e-05,
|
| 8037 |
+
"loss": 0.6781,
|
| 8038 |
+
"step": 11470
|
| 8039 |
+
},
|
| 8040 |
+
{
|
| 8041 |
+
"epoch": 5.8426546166207896,
|
| 8042 |
+
"grad_norm": 0.12256158143281937,
|
| 8043 |
+
"learning_rate": 4.9836911663799365e-05,
|
| 8044 |
+
"loss": 0.6776,
|
| 8045 |
+
"step": 11480
|
| 8046 |
+
},
|
| 8047 |
+
{
|
| 8048 |
+
"epoch": 5.847746185301277,
|
| 8049 |
+
"grad_norm": 0.14311592280864716,
|
| 8050 |
+
"learning_rate": 4.983676960078874e-05,
|
| 8051 |
+
"loss": 0.6717,
|
| 8052 |
+
"step": 11490
|
| 8053 |
+
},
|
| 8054 |
+
{
|
| 8055 |
+
"epoch": 5.852837753981766,
|
| 8056 |
+
"grad_norm": 0.1648699939250946,
|
| 8057 |
+
"learning_rate": 4.9836627537778105e-05,
|
| 8058 |
+
"loss": 0.6779,
|
| 8059 |
+
"step": 11500
|
| 8060 |
+
},
|
| 8061 |
+
{
|
| 8062 |
+
"epoch": 5.857929322662254,
|
| 8063 |
+
"grad_norm": 0.13590501248836517,
|
| 8064 |
+
"learning_rate": 4.983648547476748e-05,
|
| 8065 |
+
"loss": 0.6824,
|
| 8066 |
+
"step": 11510
|
| 8067 |
+
},
|
| 8068 |
+
{
|
| 8069 |
+
"epoch": 5.863020891342742,
|
| 8070 |
+
"grad_norm": 0.13972793519496918,
|
| 8071 |
+
"learning_rate": 4.983634341175685e-05,
|
| 8072 |
+
"loss": 0.679,
|
| 8073 |
+
"step": 11520
|
| 8074 |
+
},
|
| 8075 |
+
{
|
| 8076 |
+
"epoch": 5.86811246002323,
|
| 8077 |
+
"grad_norm": 0.11360618472099304,
|
| 8078 |
+
"learning_rate": 4.9836201348746224e-05,
|
| 8079 |
+
"loss": 0.6746,
|
| 8080 |
+
"step": 11530
|
| 8081 |
+
},
|
| 8082 |
+
{
|
| 8083 |
+
"epoch": 5.873204028703718,
|
| 8084 |
+
"grad_norm": 0.14063167572021484,
|
| 8085 |
+
"learning_rate": 4.98360592857356e-05,
|
| 8086 |
+
"loss": 0.6818,
|
| 8087 |
+
"step": 11540
|
| 8088 |
+
},
|
| 8089 |
+
{
|
| 8090 |
+
"epoch": 5.878295597384207,
|
| 8091 |
+
"grad_norm": 0.12393573671579361,
|
| 8092 |
+
"learning_rate": 4.983591722272497e-05,
|
| 8093 |
+
"loss": 0.6771,
|
| 8094 |
+
"step": 11550
|
| 8095 |
+
},
|
| 8096 |
+
{
|
| 8097 |
+
"epoch": 5.883387166064694,
|
| 8098 |
+
"grad_norm": 0.12383928149938583,
|
| 8099 |
+
"learning_rate": 4.9835775159714344e-05,
|
| 8100 |
+
"loss": 0.6807,
|
| 8101 |
+
"step": 11560
|
| 8102 |
+
},
|
| 8103 |
+
{
|
| 8104 |
+
"epoch": 5.888478734745183,
|
| 8105 |
+
"grad_norm": 0.11464569717645645,
|
| 8106 |
+
"learning_rate": 4.983563309670372e-05,
|
| 8107 |
+
"loss": 0.6823,
|
| 8108 |
+
"step": 11570
|
| 8109 |
+
},
|
| 8110 |
+
{
|
| 8111 |
+
"epoch": 5.893570303425671,
|
| 8112 |
+
"grad_norm": 0.15896569192409515,
|
| 8113 |
+
"learning_rate": 4.983549103369309e-05,
|
| 8114 |
+
"loss": 0.678,
|
| 8115 |
+
"step": 11580
|
| 8116 |
+
},
|
| 8117 |
+
{
|
| 8118 |
+
"epoch": 5.898661872106159,
|
| 8119 |
+
"grad_norm": 0.11153749376535416,
|
| 8120 |
+
"learning_rate": 4.983534897068246e-05,
|
| 8121 |
+
"loss": 0.6799,
|
| 8122 |
+
"step": 11590
|
| 8123 |
+
},
|
| 8124 |
+
{
|
| 8125 |
+
"epoch": 5.903753440786647,
|
| 8126 |
+
"grad_norm": 0.13557817041873932,
|
| 8127 |
+
"learning_rate": 4.9835206907671836e-05,
|
| 8128 |
+
"loss": 0.678,
|
| 8129 |
+
"step": 11600
|
| 8130 |
+
},
|
| 8131 |
+
{
|
| 8132 |
+
"epoch": 5.908845009467136,
|
| 8133 |
+
"grad_norm": 0.12681804597377777,
|
| 8134 |
+
"learning_rate": 4.98350648446612e-05,
|
| 8135 |
+
"loss": 0.6853,
|
| 8136 |
+
"step": 11610
|
| 8137 |
+
},
|
| 8138 |
+
{
|
| 8139 |
+
"epoch": 5.913936578147624,
|
| 8140 |
+
"grad_norm": 0.11007581651210785,
|
| 8141 |
+
"learning_rate": 4.9834922781650575e-05,
|
| 8142 |
+
"loss": 0.6799,
|
| 8143 |
+
"step": 11620
|
| 8144 |
+
},
|
| 8145 |
+
{
|
| 8146 |
+
"epoch": 5.919028146828111,
|
| 8147 |
+
"grad_norm": 0.14073921740055084,
|
| 8148 |
+
"learning_rate": 4.983478071863995e-05,
|
| 8149 |
+
"loss": 0.6809,
|
| 8150 |
+
"step": 11630
|
| 8151 |
+
},
|
| 8152 |
+
{
|
| 8153 |
+
"epoch": 5.9241197155086,
|
| 8154 |
+
"grad_norm": 0.17294389009475708,
|
| 8155 |
+
"learning_rate": 4.9834638655629315e-05,
|
| 8156 |
+
"loss": 0.677,
|
| 8157 |
+
"step": 11640
|
| 8158 |
+
},
|
| 8159 |
+
{
|
| 8160 |
+
"epoch": 5.929211284189088,
|
| 8161 |
+
"grad_norm": 0.11901852488517761,
|
| 8162 |
+
"learning_rate": 4.983449659261869e-05,
|
| 8163 |
+
"loss": 0.6814,
|
| 8164 |
+
"step": 11650
|
| 8165 |
+
},
|
| 8166 |
+
{
|
| 8167 |
+
"epoch": 5.9343028528695765,
|
| 8168 |
+
"grad_norm": 0.1563209444284439,
|
| 8169 |
+
"learning_rate": 4.983435452960806e-05,
|
| 8170 |
+
"loss": 0.6803,
|
| 8171 |
+
"step": 11660
|
| 8172 |
+
},
|
| 8173 |
+
{
|
| 8174 |
+
"epoch": 5.939394421550064,
|
| 8175 |
+
"grad_norm": 0.1763051152229309,
|
| 8176 |
+
"learning_rate": 4.9834212466597434e-05,
|
| 8177 |
+
"loss": 0.6713,
|
| 8178 |
+
"step": 11670
|
| 8179 |
+
},
|
| 8180 |
+
{
|
| 8181 |
+
"epoch": 5.944485990230553,
|
| 8182 |
+
"grad_norm": 0.1412787139415741,
|
| 8183 |
+
"learning_rate": 4.983407040358681e-05,
|
| 8184 |
+
"loss": 0.6791,
|
| 8185 |
+
"step": 11680
|
| 8186 |
+
},
|
| 8187 |
+
{
|
| 8188 |
+
"epoch": 5.949577558911041,
|
| 8189 |
+
"grad_norm": 0.13946793973445892,
|
| 8190 |
+
"learning_rate": 4.983392834057618e-05,
|
| 8191 |
+
"loss": 0.674,
|
| 8192 |
+
"step": 11690
|
| 8193 |
+
},
|
| 8194 |
+
{
|
| 8195 |
+
"epoch": 5.954669127591529,
|
| 8196 |
+
"grad_norm": 0.1848699301481247,
|
| 8197 |
+
"learning_rate": 4.9833786277565553e-05,
|
| 8198 |
+
"loss": 0.6785,
|
| 8199 |
+
"step": 11700
|
| 8200 |
+
},
|
| 8201 |
+
{
|
| 8202 |
+
"epoch": 5.959760696272017,
|
| 8203 |
+
"grad_norm": 0.14714594185352325,
|
| 8204 |
+
"learning_rate": 4.9833644214554927e-05,
|
| 8205 |
+
"loss": 0.6764,
|
| 8206 |
+
"step": 11710
|
| 8207 |
+
},
|
| 8208 |
+
{
|
| 8209 |
+
"epoch": 5.964852264952505,
|
| 8210 |
+
"grad_norm": 0.14410807192325592,
|
| 8211 |
+
"learning_rate": 4.98335021515443e-05,
|
| 8212 |
+
"loss": 0.6755,
|
| 8213 |
+
"step": 11720
|
| 8214 |
+
},
|
| 8215 |
+
{
|
| 8216 |
+
"epoch": 5.9699438336329935,
|
| 8217 |
+
"grad_norm": 0.11196265369653702,
|
| 8218 |
+
"learning_rate": 4.983336008853367e-05,
|
| 8219 |
+
"loss": 0.6801,
|
| 8220 |
+
"step": 11730
|
| 8221 |
+
},
|
| 8222 |
+
{
|
| 8223 |
+
"epoch": 5.975035402313481,
|
| 8224 |
+
"grad_norm": 0.14931631088256836,
|
| 8225 |
+
"learning_rate": 4.9833218025523046e-05,
|
| 8226 |
+
"loss": 0.6761,
|
| 8227 |
+
"step": 11740
|
| 8228 |
+
},
|
| 8229 |
+
{
|
| 8230 |
+
"epoch": 5.98012697099397,
|
| 8231 |
+
"grad_norm": 0.1235998123884201,
|
| 8232 |
+
"learning_rate": 4.983307596251241e-05,
|
| 8233 |
+
"loss": 0.6816,
|
| 8234 |
+
"step": 11750
|
| 8235 |
+
},
|
| 8236 |
+
{
|
| 8237 |
+
"epoch": 5.985218539674458,
|
| 8238 |
+
"grad_norm": 0.14235694706439972,
|
| 8239 |
+
"learning_rate": 4.9832933899501785e-05,
|
| 8240 |
+
"loss": 0.6784,
|
| 8241 |
+
"step": 11760
|
| 8242 |
+
},
|
| 8243 |
+
{
|
| 8244 |
+
"epoch": 5.9903101083549455,
|
| 8245 |
+
"grad_norm": 0.11291839182376862,
|
| 8246 |
+
"learning_rate": 4.983279183649116e-05,
|
| 8247 |
+
"loss": 0.6857,
|
| 8248 |
+
"step": 11770
|
| 8249 |
+
},
|
| 8250 |
+
{
|
| 8251 |
+
"epoch": 5.995401677035434,
|
| 8252 |
+
"grad_norm": 0.12273520231246948,
|
| 8253 |
+
"learning_rate": 4.983264977348053e-05,
|
| 8254 |
+
"loss": 0.6801,
|
| 8255 |
+
"step": 11780
|
| 8256 |
+
},
|
| 8257 |
+
{
|
| 8258 |
+
"epoch": 6.0,
|
| 8259 |
+
"grad_norm": 0.025783156976103783,
|
| 8260 |
+
"learning_rate": 4.9832507710469905e-05,
|
| 8261 |
+
"loss": 0.6142,
|
| 8262 |
+
"step": 11790
|
| 8263 |
+
},
|
| 8264 |
+
{
|
| 8265 |
+
"epoch": 6.005091568680488,
|
| 8266 |
+
"grad_norm": 0.1227310448884964,
|
| 8267 |
+
"learning_rate": 4.983236564745928e-05,
|
| 8268 |
+
"loss": 0.679,
|
| 8269 |
+
"step": 11800
|
| 8270 |
+
},
|
| 8271 |
+
{
|
| 8272 |
+
"epoch": 6.010183137360976,
|
| 8273 |
+
"grad_norm": 0.14122678339481354,
|
| 8274 |
+
"learning_rate": 4.983222358444865e-05,
|
| 8275 |
+
"loss": 0.677,
|
| 8276 |
+
"step": 11810
|
| 8277 |
+
},
|
| 8278 |
+
{
|
| 8279 |
+
"epoch": 6.015274706041464,
|
| 8280 |
+
"grad_norm": 0.14405541121959686,
|
| 8281 |
+
"learning_rate": 4.9832081521438024e-05,
|
| 8282 |
+
"loss": 0.6799,
|
| 8283 |
+
"step": 11820
|
| 8284 |
+
},
|
| 8285 |
+
{
|
| 8286 |
+
"epoch": 6.020366274721953,
|
| 8287 |
+
"grad_norm": 0.18694424629211426,
|
| 8288 |
+
"learning_rate": 4.98319394584274e-05,
|
| 8289 |
+
"loss": 0.675,
|
| 8290 |
+
"step": 11830
|
| 8291 |
+
},
|
| 8292 |
+
{
|
| 8293 |
+
"epoch": 6.025457843402441,
|
| 8294 |
+
"grad_norm": 0.1961718052625656,
|
| 8295 |
+
"learning_rate": 4.983179739541677e-05,
|
| 8296 |
+
"loss": 0.6819,
|
| 8297 |
+
"step": 11840
|
| 8298 |
+
},
|
| 8299 |
+
{
|
| 8300 |
+
"epoch": 6.030549412082929,
|
| 8301 |
+
"grad_norm": 0.1102224811911583,
|
| 8302 |
+
"learning_rate": 4.9831655332406137e-05,
|
| 8303 |
+
"loss": 0.682,
|
| 8304 |
+
"step": 11850
|
| 8305 |
+
},
|
| 8306 |
+
{
|
| 8307 |
+
"epoch": 6.035640980763417,
|
| 8308 |
+
"grad_norm": 0.1295260190963745,
|
| 8309 |
+
"learning_rate": 4.983151326939551e-05,
|
| 8310 |
+
"loss": 0.6794,
|
| 8311 |
+
"step": 11860
|
| 8312 |
+
},
|
| 8313 |
+
{
|
| 8314 |
+
"epoch": 6.040732549443905,
|
| 8315 |
+
"grad_norm": 0.12580661475658417,
|
| 8316 |
+
"learning_rate": 4.983137120638488e-05,
|
| 8317 |
+
"loss": 0.6791,
|
| 8318 |
+
"step": 11870
|
| 8319 |
+
},
|
| 8320 |
+
{
|
| 8321 |
+
"epoch": 6.0458241181243935,
|
| 8322 |
+
"grad_norm": 0.1288338154554367,
|
| 8323 |
+
"learning_rate": 4.9831229143374256e-05,
|
| 8324 |
+
"loss": 0.6805,
|
| 8325 |
+
"step": 11880
|
| 8326 |
+
},
|
| 8327 |
+
{
|
| 8328 |
+
"epoch": 6.050915686804881,
|
| 8329 |
+
"grad_norm": 0.1211671456694603,
|
| 8330 |
+
"learning_rate": 4.983108708036362e-05,
|
| 8331 |
+
"loss": 0.6764,
|
| 8332 |
+
"step": 11890
|
| 8333 |
+
},
|
| 8334 |
+
{
|
| 8335 |
+
"epoch": 6.05600725548537,
|
| 8336 |
+
"grad_norm": 0.15219536423683167,
|
| 8337 |
+
"learning_rate": 4.9830945017352995e-05,
|
| 8338 |
+
"loss": 0.6806,
|
| 8339 |
+
"step": 11900
|
| 8340 |
+
},
|
| 8341 |
+
{
|
| 8342 |
+
"epoch": 6.061098824165858,
|
| 8343 |
+
"grad_norm": 0.12759484350681305,
|
| 8344 |
+
"learning_rate": 4.983080295434237e-05,
|
| 8345 |
+
"loss": 0.676,
|
| 8346 |
+
"step": 11910
|
| 8347 |
+
},
|
| 8348 |
+
{
|
| 8349 |
+
"epoch": 6.066190392846346,
|
| 8350 |
+
"grad_norm": 0.1949695497751236,
|
| 8351 |
+
"learning_rate": 4.983066089133174e-05,
|
| 8352 |
+
"loss": 0.6832,
|
| 8353 |
+
"step": 11920
|
| 8354 |
+
},
|
| 8355 |
+
{
|
| 8356 |
+
"epoch": 6.071281961526834,
|
| 8357 |
+
"grad_norm": 0.11879277229309082,
|
| 8358 |
+
"learning_rate": 4.9830518828321115e-05,
|
| 8359 |
+
"loss": 0.6781,
|
| 8360 |
+
"step": 11930
|
| 8361 |
+
},
|
| 8362 |
+
{
|
| 8363 |
+
"epoch": 6.076373530207323,
|
| 8364 |
+
"grad_norm": 0.12636293470859528,
|
| 8365 |
+
"learning_rate": 4.983037676531049e-05,
|
| 8366 |
+
"loss": 0.6774,
|
| 8367 |
+
"step": 11940
|
| 8368 |
+
},
|
| 8369 |
+
{
|
| 8370 |
+
"epoch": 6.0814650988878105,
|
| 8371 |
+
"grad_norm": 0.13675157725811005,
|
| 8372 |
+
"learning_rate": 4.983023470229986e-05,
|
| 8373 |
+
"loss": 0.6789,
|
| 8374 |
+
"step": 11950
|
| 8375 |
+
},
|
| 8376 |
+
{
|
| 8377 |
+
"epoch": 6.086556667568298,
|
| 8378 |
+
"grad_norm": 0.13322140276432037,
|
| 8379 |
+
"learning_rate": 4.9830092639289234e-05,
|
| 8380 |
+
"loss": 0.6805,
|
| 8381 |
+
"step": 11960
|
| 8382 |
+
},
|
| 8383 |
+
{
|
| 8384 |
+
"epoch": 6.091648236248787,
|
| 8385 |
+
"grad_norm": 0.1352871060371399,
|
| 8386 |
+
"learning_rate": 4.982995057627861e-05,
|
| 8387 |
+
"loss": 0.6808,
|
| 8388 |
+
"step": 11970
|
| 8389 |
+
},
|
| 8390 |
+
{
|
| 8391 |
+
"epoch": 6.096739804929275,
|
| 8392 |
+
"grad_norm": 0.14976170659065247,
|
| 8393 |
+
"learning_rate": 4.982980851326798e-05,
|
| 8394 |
+
"loss": 0.6775,
|
| 8395 |
+
"step": 11980
|
| 8396 |
+
},
|
| 8397 |
+
{
|
| 8398 |
+
"epoch": 6.101831373609763,
|
| 8399 |
+
"grad_norm": 0.1250462532043457,
|
| 8400 |
+
"learning_rate": 4.982966645025735e-05,
|
| 8401 |
+
"loss": 0.6782,
|
| 8402 |
+
"step": 11990
|
| 8403 |
+
},
|
| 8404 |
+
{
|
| 8405 |
+
"epoch": 6.106922942290251,
|
| 8406 |
+
"grad_norm": 0.16815803945064545,
|
| 8407 |
+
"learning_rate": 4.9829524387246726e-05,
|
| 8408 |
+
"loss": 0.6721,
|
| 8409 |
+
"step": 12000
|
| 8410 |
}
|
| 8411 |
],
|
| 8412 |
"logging_steps": 10,
|