Training in progress, step 6200, checkpoint
Browse files- last-checkpoint/optimizer_0/.metadata +0 -0
- last-checkpoint/optimizer_0/__0_0.distcp +1 -1
- last-checkpoint/optimizer_0/__1_0.distcp +1 -1
- last-checkpoint/optimizer_0/__2_0.distcp +1 -1
- last-checkpoint/optimizer_0/__3_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
- last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +763 -3
last-checkpoint/optimizer_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
|
|
|
last-checkpoint/optimizer_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13934748
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9532cf3853865f83aa4b1512fed11a043caac16c7c7a479336cb00c08c47445f
|
| 3 |
size 13934748
|
last-checkpoint/optimizer_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13999412
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac0306f8cd79071439fb2e032b6a794dfe130b78d3f6139dacf123dfc6184db8
|
| 3 |
size 13999412
|
last-checkpoint/optimizer_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8952265544fc4294d6fa38bb32c8013d07436ac0fa10a7ef59f2d03aaf69a899
|
| 3 |
size 13990904
|
last-checkpoint/optimizer_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:74cac162bd00e61ab073a6b6fa81138d15f540573e2730c348646239c0af2746
|
| 3 |
size 13990904
|
last-checkpoint/pytorch_model_fsdp_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
|
|
|
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79f9fd3ffa5c298f49aa683a89f30a3b293edf8a4bf04e3e2e1304208647e606
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b62917f83638a2302f8bdb8e4696e57f59c8864664078b94923b1e2952d78862
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3dddadf1f078604529c0f4d51b0dfabc290ef123390e4b641aa10c7584948cc1
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:21fb7db76e3758690c774743f26cd5ccb3de7c9e9ec9421fb6347ba964f73792
|
| 3 |
size 6966784
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ebcdd9cb3a00187b7caf8ccddabd7425b6b74eafab1a8a7e286f4cf2c1e0dc5
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3802beb66fc35db4df22557b4497b6a8fdfdf3e582059b4fe079309c7d84ad1a
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61d911caf90f35f3e5e63bf349703d8ac88e88dcfb0f587f0a27fb4ec2d5b04b
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4faf604ceb02aaa7b878afc6f9935dd3d58f0bba74657b78471494e5a2ee20b
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:529be97fb31f3c3cb5a6124f64514f96e9dc11d13d1ad58796326c25a10ede28
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 20,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -46379,6 +46379,766 @@
|
|
| 46379 |
"eval_samples_per_second": 5.898,
|
| 46380 |
"eval_steps_per_second": 0.203,
|
| 46381 |
"step": 6100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46382 |
}
|
| 46383 |
],
|
| 46384 |
"logging_steps": 1,
|
|
@@ -46398,7 +47158,7 @@
|
|
| 46398 |
"attributes": {}
|
| 46399 |
}
|
| 46400 |
},
|
| 46401 |
-
"total_flos": 1.
|
| 46402 |
"train_batch_size": 8,
|
| 46403 |
"trial_name": null,
|
| 46404 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.9147915898192549,
|
| 5 |
"eval_steps": 20,
|
| 6 |
+
"global_step": 6200,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 46379 |
"eval_samples_per_second": 5.898,
|
| 46380 |
"eval_steps_per_second": 0.203,
|
| 46381 |
"step": 6100
|
| 46382 |
+
},
|
| 46383 |
+
{
|
| 46384 |
+
"epoch": 0.9001844337882701,
|
| 46385 |
+
"grad_norm": 1.5873862504959106,
|
| 46386 |
+
"learning_rate": 6.001410145373998e-07,
|
| 46387 |
+
"loss": 0.0303,
|
| 46388 |
+
"step": 6101
|
| 46389 |
+
},
|
| 46390 |
+
{
|
| 46391 |
+
"epoch": 0.900331980818886,
|
| 46392 |
+
"grad_norm": 2.3204448223114014,
|
| 46393 |
+
"learning_rate": 5.983846732267118e-07,
|
| 46394 |
+
"loss": 0.0738,
|
| 46395 |
+
"step": 6102
|
| 46396 |
+
},
|
| 46397 |
+
{
|
| 46398 |
+
"epoch": 0.900479527849502,
|
| 46399 |
+
"grad_norm": 3.2990872859954834,
|
| 46400 |
+
"learning_rate": 5.966308264242837e-07,
|
| 46401 |
+
"loss": 0.0545,
|
| 46402 |
+
"step": 6103
|
| 46403 |
+
},
|
| 46404 |
+
{
|
| 46405 |
+
"epoch": 0.9006270748801181,
|
| 46406 |
+
"grad_norm": 2.091925859451294,
|
| 46407 |
+
"learning_rate": 5.948794745954655e-07,
|
| 46408 |
+
"loss": 0.0608,
|
| 46409 |
+
"step": 6104
|
| 46410 |
+
},
|
| 46411 |
+
{
|
| 46412 |
+
"epoch": 0.9007746219107341,
|
| 46413 |
+
"grad_norm": 2.3603947162628174,
|
| 46414 |
+
"learning_rate": 5.931306182049335e-07,
|
| 46415 |
+
"loss": 0.0288,
|
| 46416 |
+
"step": 6105
|
| 46417 |
+
},
|
| 46418 |
+
{
|
| 46419 |
+
"epoch": 0.90092216894135,
|
| 46420 |
+
"grad_norm": 3.0553927421569824,
|
| 46421 |
+
"learning_rate": 5.913842577167117e-07,
|
| 46422 |
+
"loss": 0.0304,
|
| 46423 |
+
"step": 6106
|
| 46424 |
+
},
|
| 46425 |
+
{
|
| 46426 |
+
"epoch": 0.9010697159719661,
|
| 46427 |
+
"grad_norm": 1.18839693069458,
|
| 46428 |
+
"learning_rate": 5.896403935941564e-07,
|
| 46429 |
+
"loss": 0.0255,
|
| 46430 |
+
"step": 6107
|
| 46431 |
+
},
|
| 46432 |
+
{
|
| 46433 |
+
"epoch": 0.9012172630025821,
|
| 46434 |
+
"grad_norm": 0.8999655842781067,
|
| 46435 |
+
"learning_rate": 5.878990262999628e-07,
|
| 46436 |
+
"loss": 0.0247,
|
| 46437 |
+
"step": 6108
|
| 46438 |
+
},
|
| 46439 |
+
{
|
| 46440 |
+
"epoch": 0.9013648100331981,
|
| 46441 |
+
"grad_norm": 1.7418278455734253,
|
| 46442 |
+
"learning_rate": 5.861601562961639e-07,
|
| 46443 |
+
"loss": 0.0533,
|
| 46444 |
+
"step": 6109
|
| 46445 |
+
},
|
| 46446 |
+
{
|
| 46447 |
+
"epoch": 0.901512357063814,
|
| 46448 |
+
"grad_norm": 1.063058853149414,
|
| 46449 |
+
"learning_rate": 5.844237840441291e-07,
|
| 46450 |
+
"loss": 0.0156,
|
| 46451 |
+
"step": 6110
|
| 46452 |
+
},
|
| 46453 |
+
{
|
| 46454 |
+
"epoch": 0.9016599040944301,
|
| 46455 |
+
"grad_norm": 4.956036567687988,
|
| 46456 |
+
"learning_rate": 5.826899100045669e-07,
|
| 46457 |
+
"loss": 0.1087,
|
| 46458 |
+
"step": 6111
|
| 46459 |
+
},
|
| 46460 |
+
{
|
| 46461 |
+
"epoch": 0.9018074511250461,
|
| 46462 |
+
"grad_norm": 0.6496356725692749,
|
| 46463 |
+
"learning_rate": 5.809585346375235e-07,
|
| 46464 |
+
"loss": 0.0248,
|
| 46465 |
+
"step": 6112
|
| 46466 |
+
},
|
| 46467 |
+
{
|
| 46468 |
+
"epoch": 0.9019549981556622,
|
| 46469 |
+
"grad_norm": 4.678279876708984,
|
| 46470 |
+
"learning_rate": 5.792296584023782e-07,
|
| 46471 |
+
"loss": 0.0501,
|
| 46472 |
+
"step": 6113
|
| 46473 |
+
},
|
| 46474 |
+
{
|
| 46475 |
+
"epoch": 0.9021025451862781,
|
| 46476 |
+
"grad_norm": 2.635258674621582,
|
| 46477 |
+
"learning_rate": 5.775032817578486e-07,
|
| 46478 |
+
"loss": 0.0709,
|
| 46479 |
+
"step": 6114
|
| 46480 |
+
},
|
| 46481 |
+
{
|
| 46482 |
+
"epoch": 0.9022500922168941,
|
| 46483 |
+
"grad_norm": 1.1114413738250732,
|
| 46484 |
+
"learning_rate": 5.757794051619936e-07,
|
| 46485 |
+
"loss": 0.0233,
|
| 46486 |
+
"step": 6115
|
| 46487 |
+
},
|
| 46488 |
+
{
|
| 46489 |
+
"epoch": 0.9023976392475102,
|
| 46490 |
+
"grad_norm": 2.946363925933838,
|
| 46491 |
+
"learning_rate": 5.740580290722042e-07,
|
| 46492 |
+
"loss": 0.0664,
|
| 46493 |
+
"step": 6116
|
| 46494 |
+
},
|
| 46495 |
+
{
|
| 46496 |
+
"epoch": 0.9025451862781262,
|
| 46497 |
+
"grad_norm": 3.6926958560943604,
|
| 46498 |
+
"learning_rate": 5.723391539452061e-07,
|
| 46499 |
+
"loss": 0.049,
|
| 46500 |
+
"step": 6117
|
| 46501 |
+
},
|
| 46502 |
+
{
|
| 46503 |
+
"epoch": 0.9026927333087421,
|
| 46504 |
+
"grad_norm": 3.990233898162842,
|
| 46505 |
+
"learning_rate": 5.70622780237069e-07,
|
| 46506 |
+
"loss": 0.1301,
|
| 46507 |
+
"step": 6118
|
| 46508 |
+
},
|
| 46509 |
+
{
|
| 46510 |
+
"epoch": 0.9028402803393581,
|
| 46511 |
+
"grad_norm": 1.862289547920227,
|
| 46512 |
+
"learning_rate": 5.689089084031896e-07,
|
| 46513 |
+
"loss": 0.0485,
|
| 46514 |
+
"step": 6119
|
| 46515 |
+
},
|
| 46516 |
+
{
|
| 46517 |
+
"epoch": 0.9029878273699742,
|
| 46518 |
+
"grad_norm": 1.8196097612380981,
|
| 46519 |
+
"learning_rate": 5.671975388983086e-07,
|
| 46520 |
+
"loss": 0.0271,
|
| 46521 |
+
"step": 6120
|
| 46522 |
+
},
|
| 46523 |
+
{
|
| 46524 |
+
"epoch": 0.9029878273699742,
|
| 46525 |
+
"eval_accuracy": 0.9782923299565847,
|
| 46526 |
+
"eval_f1": 0.9629629629629629,
|
| 46527 |
+
"eval_loss": 0.05541698634624481,
|
| 46528 |
+
"eval_precision": 0.9798994974874372,
|
| 46529 |
+
"eval_recall": 0.9466019417475728,
|
| 46530 |
+
"eval_runtime": 48.5394,
|
| 46531 |
+
"eval_samples_per_second": 5.995,
|
| 46532 |
+
"eval_steps_per_second": 0.206,
|
| 46533 |
+
"step": 6120
|
| 46534 |
+
},
|
| 46535 |
+
{
|
| 46536 |
+
"epoch": 0.9031353744005902,
|
| 46537 |
+
"grad_norm": 0.9414036273956299,
|
| 46538 |
+
"learning_rate": 5.654886721764997e-07,
|
| 46539 |
+
"loss": 0.0153,
|
| 46540 |
+
"step": 6121
|
| 46541 |
+
},
|
| 46542 |
+
{
|
| 46543 |
+
"epoch": 0.9032829214312061,
|
| 46544 |
+
"grad_norm": 1.2081336975097656,
|
| 46545 |
+
"learning_rate": 5.637823086911698e-07,
|
| 46546 |
+
"loss": 0.0251,
|
| 46547 |
+
"step": 6122
|
| 46548 |
+
},
|
| 46549 |
+
{
|
| 46550 |
+
"epoch": 0.9034304684618222,
|
| 46551 |
+
"grad_norm": 2.001443386077881,
|
| 46552 |
+
"learning_rate": 5.620784488950681e-07,
|
| 46553 |
+
"loss": 0.0558,
|
| 46554 |
+
"step": 6123
|
| 46555 |
+
},
|
| 46556 |
+
{
|
| 46557 |
+
"epoch": 0.9035780154924382,
|
| 46558 |
+
"grad_norm": 1.7640726566314697,
|
| 46559 |
+
"learning_rate": 5.603770932402719e-07,
|
| 46560 |
+
"loss": 0.0503,
|
| 46561 |
+
"step": 6124
|
| 46562 |
+
},
|
| 46563 |
+
{
|
| 46564 |
+
"epoch": 0.9037255625230542,
|
| 46565 |
+
"grad_norm": 5.103485107421875,
|
| 46566 |
+
"learning_rate": 5.586782421781989e-07,
|
| 46567 |
+
"loss": 0.1603,
|
| 46568 |
+
"step": 6125
|
| 46569 |
+
},
|
| 46570 |
+
{
|
| 46571 |
+
"epoch": 0.9038731095536703,
|
| 46572 |
+
"grad_norm": 1.4745298624038696,
|
| 46573 |
+
"learning_rate": 5.569818961596041e-07,
|
| 46574 |
+
"loss": 0.0552,
|
| 46575 |
+
"step": 6126
|
| 46576 |
+
},
|
| 46577 |
+
{
|
| 46578 |
+
"epoch": 0.9040206565842862,
|
| 46579 |
+
"grad_norm": 1.9164541959762573,
|
| 46580 |
+
"learning_rate": 5.552880556345719e-07,
|
| 46581 |
+
"loss": 0.0409,
|
| 46582 |
+
"step": 6127
|
| 46583 |
+
},
|
| 46584 |
+
{
|
| 46585 |
+
"epoch": 0.9041682036149022,
|
| 46586 |
+
"grad_norm": 3.4288718700408936,
|
| 46587 |
+
"learning_rate": 5.535967210525239e-07,
|
| 46588 |
+
"loss": 0.0773,
|
| 46589 |
+
"step": 6128
|
| 46590 |
+
},
|
| 46591 |
+
{
|
| 46592 |
+
"epoch": 0.9043157506455183,
|
| 46593 |
+
"grad_norm": 1.621910572052002,
|
| 46594 |
+
"learning_rate": 5.519078928622212e-07,
|
| 46595 |
+
"loss": 0.0666,
|
| 46596 |
+
"step": 6129
|
| 46597 |
+
},
|
| 46598 |
+
{
|
| 46599 |
+
"epoch": 0.9044632976761343,
|
| 46600 |
+
"grad_norm": 1.874854564666748,
|
| 46601 |
+
"learning_rate": 5.502215715117553e-07,
|
| 46602 |
+
"loss": 0.0426,
|
| 46603 |
+
"step": 6130
|
| 46604 |
+
},
|
| 46605 |
+
{
|
| 46606 |
+
"epoch": 0.9046108447067502,
|
| 46607 |
+
"grad_norm": 3.9866039752960205,
|
| 46608 |
+
"learning_rate": 5.485377574485528e-07,
|
| 46609 |
+
"loss": 0.0918,
|
| 46610 |
+
"step": 6131
|
| 46611 |
+
},
|
| 46612 |
+
{
|
| 46613 |
+
"epoch": 0.9047583917373663,
|
| 46614 |
+
"grad_norm": 2.3686044216156006,
|
| 46615 |
+
"learning_rate": 5.468564511193786e-07,
|
| 46616 |
+
"loss": 0.0882,
|
| 46617 |
+
"step": 6132
|
| 46618 |
+
},
|
| 46619 |
+
{
|
| 46620 |
+
"epoch": 0.9049059387679823,
|
| 46621 |
+
"grad_norm": 2.9734747409820557,
|
| 46622 |
+
"learning_rate": 5.451776529703256e-07,
|
| 46623 |
+
"loss": 0.1109,
|
| 46624 |
+
"step": 6133
|
| 46625 |
+
},
|
| 46626 |
+
{
|
| 46627 |
+
"epoch": 0.9050534857985983,
|
| 46628 |
+
"grad_norm": 2.4147839546203613,
|
| 46629 |
+
"learning_rate": 5.435013634468289e-07,
|
| 46630 |
+
"loss": 0.0677,
|
| 46631 |
+
"step": 6134
|
| 46632 |
+
},
|
| 46633 |
+
{
|
| 46634 |
+
"epoch": 0.9052010328292143,
|
| 46635 |
+
"grad_norm": 4.090640068054199,
|
| 46636 |
+
"learning_rate": 5.418275829936537e-07,
|
| 46637 |
+
"loss": 0.0467,
|
| 46638 |
+
"step": 6135
|
| 46639 |
+
},
|
| 46640 |
+
{
|
| 46641 |
+
"epoch": 0.9053485798598303,
|
| 46642 |
+
"grad_norm": 7.092474460601807,
|
| 46643 |
+
"learning_rate": 5.401563120548991e-07,
|
| 46644 |
+
"loss": 0.0388,
|
| 46645 |
+
"step": 6136
|
| 46646 |
+
},
|
| 46647 |
+
{
|
| 46648 |
+
"epoch": 0.9054961268904463,
|
| 46649 |
+
"grad_norm": 2.077030897140503,
|
| 46650 |
+
"learning_rate": 5.384875510740007e-07,
|
| 46651 |
+
"loss": 0.0505,
|
| 46652 |
+
"step": 6137
|
| 46653 |
+
},
|
| 46654 |
+
{
|
| 46655 |
+
"epoch": 0.9056436739210624,
|
| 46656 |
+
"grad_norm": 2.185776710510254,
|
| 46657 |
+
"learning_rate": 5.368213004937262e-07,
|
| 46658 |
+
"loss": 0.0583,
|
| 46659 |
+
"step": 6138
|
| 46660 |
+
},
|
| 46661 |
+
{
|
| 46662 |
+
"epoch": 0.9057912209516783,
|
| 46663 |
+
"grad_norm": 2.0845303535461426,
|
| 46664 |
+
"learning_rate": 5.351575607561766e-07,
|
| 46665 |
+
"loss": 0.0499,
|
| 46666 |
+
"step": 6139
|
| 46667 |
+
},
|
| 46668 |
+
{
|
| 46669 |
+
"epoch": 0.9059387679822943,
|
| 46670 |
+
"grad_norm": 1.5722860097885132,
|
| 46671 |
+
"learning_rate": 5.334963323027919e-07,
|
| 46672 |
+
"loss": 0.0405,
|
| 46673 |
+
"step": 6140
|
| 46674 |
+
},
|
| 46675 |
+
{
|
| 46676 |
+
"epoch": 0.9059387679822943,
|
| 46677 |
+
"eval_accuracy": 0.9782923299565847,
|
| 46678 |
+
"eval_f1": 0.9629629629629629,
|
| 46679 |
+
"eval_loss": 0.05552350729703903,
|
| 46680 |
+
"eval_precision": 0.9798994974874372,
|
| 46681 |
+
"eval_recall": 0.9466019417475728,
|
| 46682 |
+
"eval_runtime": 49.1374,
|
| 46683 |
+
"eval_samples_per_second": 5.922,
|
| 46684 |
+
"eval_steps_per_second": 0.204,
|
| 46685 |
+
"step": 6140
|
| 46686 |
+
},
|
| 46687 |
+
{
|
| 46688 |
+
"epoch": 0.9060863150129104,
|
| 46689 |
+
"grad_norm": 4.133938789367676,
|
| 46690 |
+
"learning_rate": 5.318376155743387e-07,
|
| 46691 |
+
"loss": 0.0862,
|
| 46692 |
+
"step": 6141
|
| 46693 |
+
},
|
| 46694 |
+
{
|
| 46695 |
+
"epoch": 0.9062338620435264,
|
| 46696 |
+
"grad_norm": 1.9547992944717407,
|
| 46697 |
+
"learning_rate": 5.301814110109205e-07,
|
| 46698 |
+
"loss": 0.0653,
|
| 46699 |
+
"step": 6142
|
| 46700 |
+
},
|
| 46701 |
+
{
|
| 46702 |
+
"epoch": 0.9063814090741423,
|
| 46703 |
+
"grad_norm": 3.051151990890503,
|
| 46704 |
+
"learning_rate": 5.285277190519744e-07,
|
| 46705 |
+
"loss": 0.0737,
|
| 46706 |
+
"step": 6143
|
| 46707 |
+
},
|
| 46708 |
+
{
|
| 46709 |
+
"epoch": 0.9065289561047584,
|
| 46710 |
+
"grad_norm": 1.4388315677642822,
|
| 46711 |
+
"learning_rate": 5.268765401362718e-07,
|
| 46712 |
+
"loss": 0.0179,
|
| 46713 |
+
"step": 6144
|
| 46714 |
+
},
|
| 46715 |
+
{
|
| 46716 |
+
"epoch": 0.9066765031353744,
|
| 46717 |
+
"grad_norm": 1.3435120582580566,
|
| 46718 |
+
"learning_rate": 5.252278747019146e-07,
|
| 46719 |
+
"loss": 0.0307,
|
| 46720 |
+
"step": 6145
|
| 46721 |
+
},
|
| 46722 |
+
{
|
| 46723 |
+
"epoch": 0.9068240501659904,
|
| 46724 |
+
"grad_norm": 2.0128400325775146,
|
| 46725 |
+
"learning_rate": 5.235817231863405e-07,
|
| 46726 |
+
"loss": 0.0618,
|
| 46727 |
+
"step": 6146
|
| 46728 |
+
},
|
| 46729 |
+
{
|
| 46730 |
+
"epoch": 0.9069715971966065,
|
| 46731 |
+
"grad_norm": 1.971063256263733,
|
| 46732 |
+
"learning_rate": 5.219380860263168e-07,
|
| 46733 |
+
"loss": 0.0684,
|
| 46734 |
+
"step": 6147
|
| 46735 |
+
},
|
| 46736 |
+
{
|
| 46737 |
+
"epoch": 0.9071191442272224,
|
| 46738 |
+
"grad_norm": 1.8959208726882935,
|
| 46739 |
+
"learning_rate": 5.20296963657948e-07,
|
| 46740 |
+
"loss": 0.0111,
|
| 46741 |
+
"step": 6148
|
| 46742 |
+
},
|
| 46743 |
+
{
|
| 46744 |
+
"epoch": 0.9072666912578384,
|
| 46745 |
+
"grad_norm": 4.560550212860107,
|
| 46746 |
+
"learning_rate": 5.186583565166692e-07,
|
| 46747 |
+
"loss": 0.1139,
|
| 46748 |
+
"step": 6149
|
| 46749 |
+
},
|
| 46750 |
+
{
|
| 46751 |
+
"epoch": 0.9074142382884545,
|
| 46752 |
+
"grad_norm": 2.025960922241211,
|
| 46753 |
+
"learning_rate": 5.17022265037247e-07,
|
| 46754 |
+
"loss": 0.051,
|
| 46755 |
+
"step": 6150
|
| 46756 |
+
},
|
| 46757 |
+
{
|
| 46758 |
+
"epoch": 0.9075617853190705,
|
| 46759 |
+
"grad_norm": 2.0765039920806885,
|
| 46760 |
+
"learning_rate": 5.153886896537829e-07,
|
| 46761 |
+
"loss": 0.0616,
|
| 46762 |
+
"step": 6151
|
| 46763 |
+
},
|
| 46764 |
+
{
|
| 46765 |
+
"epoch": 0.9077093323496864,
|
| 46766 |
+
"grad_norm": 1.6943057775497437,
|
| 46767 |
+
"learning_rate": 5.137576307997083e-07,
|
| 46768 |
+
"loss": 0.0418,
|
| 46769 |
+
"step": 6152
|
| 46770 |
+
},
|
| 46771 |
+
{
|
| 46772 |
+
"epoch": 0.9078568793803025,
|
| 46773 |
+
"grad_norm": 3.1802217960357666,
|
| 46774 |
+
"learning_rate": 5.121290889077879e-07,
|
| 46775 |
+
"loss": 0.0782,
|
| 46776 |
+
"step": 6153
|
| 46777 |
+
},
|
| 46778 |
+
{
|
| 46779 |
+
"epoch": 0.9080044264109185,
|
| 46780 |
+
"grad_norm": 5.109692573547363,
|
| 46781 |
+
"learning_rate": 5.105030644101206e-07,
|
| 46782 |
+
"loss": 0.0681,
|
| 46783 |
+
"step": 6154
|
| 46784 |
+
},
|
| 46785 |
+
{
|
| 46786 |
+
"epoch": 0.9081519734415345,
|
| 46787 |
+
"grad_norm": 2.058464527130127,
|
| 46788 |
+
"learning_rate": 5.088795577381356e-07,
|
| 46789 |
+
"loss": 0.0458,
|
| 46790 |
+
"step": 6155
|
| 46791 |
+
},
|
| 46792 |
+
{
|
| 46793 |
+
"epoch": 0.9082995204721505,
|
| 46794 |
+
"grad_norm": 2.1350038051605225,
|
| 46795 |
+
"learning_rate": 5.072585693225918e-07,
|
| 46796 |
+
"loss": 0.0465,
|
| 46797 |
+
"step": 6156
|
| 46798 |
+
},
|
| 46799 |
+
{
|
| 46800 |
+
"epoch": 0.9084470675027665,
|
| 46801 |
+
"grad_norm": 0.7591288685798645,
|
| 46802 |
+
"learning_rate": 5.056400995935829e-07,
|
| 46803 |
+
"loss": 0.018,
|
| 46804 |
+
"step": 6157
|
| 46805 |
+
},
|
| 46806 |
+
{
|
| 46807 |
+
"epoch": 0.9085946145333825,
|
| 46808 |
+
"grad_norm": 4.6449761390686035,
|
| 46809 |
+
"learning_rate": 5.040241489805365e-07,
|
| 46810 |
+
"loss": 0.1426,
|
| 46811 |
+
"step": 6158
|
| 46812 |
+
},
|
| 46813 |
+
{
|
| 46814 |
+
"epoch": 0.9087421615639986,
|
| 46815 |
+
"grad_norm": 6.209170341491699,
|
| 46816 |
+
"learning_rate": 5.024107179122051e-07,
|
| 46817 |
+
"loss": 0.164,
|
| 46818 |
+
"step": 6159
|
| 46819 |
+
},
|
| 46820 |
+
{
|
| 46821 |
+
"epoch": 0.9088897085946145,
|
| 46822 |
+
"grad_norm": 2.090540647506714,
|
| 46823 |
+
"learning_rate": 5.007998068166786e-07,
|
| 46824 |
+
"loss": 0.0175,
|
| 46825 |
+
"step": 6160
|
| 46826 |
+
},
|
| 46827 |
+
{
|
| 46828 |
+
"epoch": 0.9088897085946145,
|
| 46829 |
+
"eval_accuracy": 0.9782923299565847,
|
| 46830 |
+
"eval_f1": 0.9629629629629629,
|
| 46831 |
+
"eval_loss": 0.05504719540476799,
|
| 46832 |
+
"eval_precision": 0.9798994974874372,
|
| 46833 |
+
"eval_recall": 0.9466019417475728,
|
| 46834 |
+
"eval_runtime": 49.3101,
|
| 46835 |
+
"eval_samples_per_second": 5.901,
|
| 46836 |
+
"eval_steps_per_second": 0.203,
|
| 46837 |
+
"step": 6160
|
| 46838 |
+
},
|
| 46839 |
+
{
|
| 46840 |
+
"epoch": 0.9090372556252305,
|
| 46841 |
+
"grad_norm": 3.121851682662964,
|
| 46842 |
+
"learning_rate": 4.991914161213751e-07,
|
| 46843 |
+
"loss": 0.032,
|
| 46844 |
+
"step": 6161
|
| 46845 |
+
},
|
| 46846 |
+
{
|
| 46847 |
+
"epoch": 0.9091848026558466,
|
| 46848 |
+
"grad_norm": 0.8654899597167969,
|
| 46849 |
+
"learning_rate": 4.975855462530465e-07,
|
| 46850 |
+
"loss": 0.0127,
|
| 46851 |
+
"step": 6162
|
| 46852 |
+
},
|
| 46853 |
+
{
|
| 46854 |
+
"epoch": 0.9093323496864626,
|
| 46855 |
+
"grad_norm": 1.8461565971374512,
|
| 46856 |
+
"learning_rate": 4.959821976377743e-07,
|
| 46857 |
+
"loss": 0.0472,
|
| 46858 |
+
"step": 6163
|
| 46859 |
+
},
|
| 46860 |
+
{
|
| 46861 |
+
"epoch": 0.9094798967170785,
|
| 46862 |
+
"grad_norm": 1.1268733739852905,
|
| 46863 |
+
"learning_rate": 4.943813707009693e-07,
|
| 46864 |
+
"loss": 0.0343,
|
| 46865 |
+
"step": 6164
|
| 46866 |
+
},
|
| 46867 |
+
{
|
| 46868 |
+
"epoch": 0.9096274437476946,
|
| 46869 |
+
"grad_norm": 2.749328374862671,
|
| 46870 |
+
"learning_rate": 4.927830658673771e-07,
|
| 46871 |
+
"loss": 0.1282,
|
| 46872 |
+
"step": 6165
|
| 46873 |
+
},
|
| 46874 |
+
{
|
| 46875 |
+
"epoch": 0.9097749907783106,
|
| 46876 |
+
"grad_norm": 1.882821798324585,
|
| 46877 |
+
"learning_rate": 4.911872835610721e-07,
|
| 46878 |
+
"loss": 0.0469,
|
| 46879 |
+
"step": 6166
|
| 46880 |
+
},
|
| 46881 |
+
{
|
| 46882 |
+
"epoch": 0.9099225378089266,
|
| 46883 |
+
"grad_norm": 2.297895669937134,
|
| 46884 |
+
"learning_rate": 4.895940242054564e-07,
|
| 46885 |
+
"loss": 0.0466,
|
| 46886 |
+
"step": 6167
|
| 46887 |
+
},
|
| 46888 |
+
{
|
| 46889 |
+
"epoch": 0.9100700848395427,
|
| 46890 |
+
"grad_norm": 2.072247266769409,
|
| 46891 |
+
"learning_rate": 4.880032882232699e-07,
|
| 46892 |
+
"loss": 0.0569,
|
| 46893 |
+
"step": 6168
|
| 46894 |
+
},
|
| 46895 |
+
{
|
| 46896 |
+
"epoch": 0.9102176318701586,
|
| 46897 |
+
"grad_norm": 3.2294955253601074,
|
| 46898 |
+
"learning_rate": 4.864150760365771e-07,
|
| 46899 |
+
"loss": 0.1044,
|
| 46900 |
+
"step": 6169
|
| 46901 |
+
},
|
| 46902 |
+
{
|
| 46903 |
+
"epoch": 0.9103651789007746,
|
| 46904 |
+
"grad_norm": 1.4208635091781616,
|
| 46905 |
+
"learning_rate": 4.848293880667732e-07,
|
| 46906 |
+
"loss": 0.0203,
|
| 46907 |
+
"step": 6170
|
| 46908 |
+
},
|
| 46909 |
+
{
|
| 46910 |
+
"epoch": 0.9105127259313907,
|
| 46911 |
+
"grad_norm": 3.2182838916778564,
|
| 46912 |
+
"learning_rate": 4.83246224734587e-07,
|
| 46913 |
+
"loss": 0.0434,
|
| 46914 |
+
"step": 6171
|
| 46915 |
+
},
|
| 46916 |
+
{
|
| 46917 |
+
"epoch": 0.9106602729620067,
|
| 46918 |
+
"grad_norm": 3.255988359451294,
|
| 46919 |
+
"learning_rate": 4.81665586460075e-07,
|
| 46920 |
+
"loss": 0.0959,
|
| 46921 |
+
"step": 6172
|
| 46922 |
+
},
|
| 46923 |
+
{
|
| 46924 |
+
"epoch": 0.9108078199926226,
|
| 46925 |
+
"grad_norm": 4.523223876953125,
|
| 46926 |
+
"learning_rate": 4.800874736626226e-07,
|
| 46927 |
+
"loss": 0.0774,
|
| 46928 |
+
"step": 6173
|
| 46929 |
+
},
|
| 46930 |
+
{
|
| 46931 |
+
"epoch": 0.9109553670232386,
|
| 46932 |
+
"grad_norm": 1.8783808946609497,
|
| 46933 |
+
"learning_rate": 4.785118867609507e-07,
|
| 46934 |
+
"loss": 0.0606,
|
| 46935 |
+
"step": 6174
|
| 46936 |
+
},
|
| 46937 |
+
{
|
| 46938 |
+
"epoch": 0.9111029140538547,
|
| 46939 |
+
"grad_norm": 2.5650320053100586,
|
| 46940 |
+
"learning_rate": 4.769388261731012e-07,
|
| 46941 |
+
"loss": 0.0939,
|
| 46942 |
+
"step": 6175
|
| 46943 |
+
},
|
| 46944 |
+
{
|
| 46945 |
+
"epoch": 0.9112504610844707,
|
| 46946 |
+
"grad_norm": 5.402647495269775,
|
| 46947 |
+
"learning_rate": 4.7536829231645156e-07,
|
| 46948 |
+
"loss": 0.0886,
|
| 46949 |
+
"step": 6176
|
| 46950 |
+
},
|
| 46951 |
+
{
|
| 46952 |
+
"epoch": 0.9113980081150866,
|
| 46953 |
+
"grad_norm": 3.0229949951171875,
|
| 46954 |
+
"learning_rate": 4.738002856077117e-07,
|
| 46955 |
+
"loss": 0.0827,
|
| 46956 |
+
"step": 6177
|
| 46957 |
+
},
|
| 46958 |
+
{
|
| 46959 |
+
"epoch": 0.9115455551457027,
|
| 46960 |
+
"grad_norm": 1.8020105361938477,
|
| 46961 |
+
"learning_rate": 4.722348064629123e-07,
|
| 46962 |
+
"loss": 0.0423,
|
| 46963 |
+
"step": 6178
|
| 46964 |
+
},
|
| 46965 |
+
{
|
| 46966 |
+
"epoch": 0.9116931021763187,
|
| 46967 |
+
"grad_norm": 1.6765297651290894,
|
| 46968 |
+
"learning_rate": 4.706718552974221e-07,
|
| 46969 |
+
"loss": 0.0494,
|
| 46970 |
+
"step": 6179
|
| 46971 |
+
},
|
| 46972 |
+
{
|
| 46973 |
+
"epoch": 0.9118406492069348,
|
| 46974 |
+
"grad_norm": 1.5524864196777344,
|
| 46975 |
+
"learning_rate": 4.691114325259327e-07,
|
| 46976 |
+
"loss": 0.0459,
|
| 46977 |
+
"step": 6180
|
| 46978 |
+
},
|
| 46979 |
+
{
|
| 46980 |
+
"epoch": 0.9118406492069348,
|
| 46981 |
+
"eval_accuracy": 0.9782923299565847,
|
| 46982 |
+
"eval_f1": 0.9629629629629629,
|
| 46983 |
+
"eval_loss": 0.05488729849457741,
|
| 46984 |
+
"eval_precision": 0.9798994974874372,
|
| 46985 |
+
"eval_recall": 0.9466019417475728,
|
| 46986 |
+
"eval_runtime": 51.2558,
|
| 46987 |
+
"eval_samples_per_second": 5.677,
|
| 46988 |
+
"eval_steps_per_second": 0.195,
|
| 46989 |
+
"step": 6180
|
| 46990 |
+
},
|
| 46991 |
+
{
|
| 46992 |
+
"epoch": 0.9119881962375507,
|
| 46993 |
+
"grad_norm": 1.8164972066879272,
|
| 46994 |
+
"learning_rate": 4.6755353856246635e-07,
|
| 46995 |
+
"loss": 0.0531,
|
| 46996 |
+
"step": 6181
|
| 46997 |
+
},
|
| 46998 |
+
{
|
| 46999 |
+
"epoch": 0.9121357432681667,
|
| 47000 |
+
"grad_norm": 2.300924301147461,
|
| 47001 |
+
"learning_rate": 4.6599817382037895e-07,
|
| 47002 |
+
"loss": 0.0215,
|
| 47003 |
+
"step": 6182
|
| 47004 |
+
},
|
| 47005 |
+
{
|
| 47006 |
+
"epoch": 0.9122832902987827,
|
| 47007 |
+
"grad_norm": 1.35698664188385,
|
| 47008 |
+
"learning_rate": 4.644453387123504e-07,
|
| 47009 |
+
"loss": 0.0215,
|
| 47010 |
+
"step": 6183
|
| 47011 |
+
},
|
| 47012 |
+
{
|
| 47013 |
+
"epoch": 0.9124308373293988,
|
| 47014 |
+
"grad_norm": 2.6017749309539795,
|
| 47015 |
+
"learning_rate": 4.6289503365038904e-07,
|
| 47016 |
+
"loss": 0.0511,
|
| 47017 |
+
"step": 6184
|
| 47018 |
+
},
|
| 47019 |
+
{
|
| 47020 |
+
"epoch": 0.9125783843600147,
|
| 47021 |
+
"grad_norm": 0.7743318676948547,
|
| 47022 |
+
"learning_rate": 4.6134725904583565e-07,
|
| 47023 |
+
"loss": 0.0137,
|
| 47024 |
+
"step": 6185
|
| 47025 |
+
},
|
| 47026 |
+
{
|
| 47027 |
+
"epoch": 0.9127259313906307,
|
| 47028 |
+
"grad_norm": 2.4677441120147705,
|
| 47029 |
+
"learning_rate": 4.598020153093552e-07,
|
| 47030 |
+
"loss": 0.0779,
|
| 47031 |
+
"step": 6186
|
| 47032 |
+
},
|
| 47033 |
+
{
|
| 47034 |
+
"epoch": 0.9128734784212468,
|
| 47035 |
+
"grad_norm": 2.9131996631622314,
|
| 47036 |
+
"learning_rate": 4.582593028509452e-07,
|
| 47037 |
+
"loss": 0.062,
|
| 47038 |
+
"step": 6187
|
| 47039 |
+
},
|
| 47040 |
+
{
|
| 47041 |
+
"epoch": 0.9130210254518628,
|
| 47042 |
+
"grad_norm": 1.517683982849121,
|
| 47043 |
+
"learning_rate": 4.567191220799305e-07,
|
| 47044 |
+
"loss": 0.0688,
|
| 47045 |
+
"step": 6188
|
| 47046 |
+
},
|
| 47047 |
+
{
|
| 47048 |
+
"epoch": 0.9131685724824787,
|
| 47049 |
+
"grad_norm": 3.0885980129241943,
|
| 47050 |
+
"learning_rate": 4.55181473404962e-07,
|
| 47051 |
+
"loss": 0.0986,
|
| 47052 |
+
"step": 6189
|
| 47053 |
+
},
|
| 47054 |
+
{
|
| 47055 |
+
"epoch": 0.9133161195130948,
|
| 47056 |
+
"grad_norm": 1.978442907333374,
|
| 47057 |
+
"learning_rate": 4.536463572340222e-07,
|
| 47058 |
+
"loss": 0.0489,
|
| 47059 |
+
"step": 6190
|
| 47060 |
+
},
|
| 47061 |
+
{
|
| 47062 |
+
"epoch": 0.9134636665437108,
|
| 47063 |
+
"grad_norm": 1.8119323253631592,
|
| 47064 |
+
"learning_rate": 4.5211377397441857e-07,
|
| 47065 |
+
"loss": 0.0523,
|
| 47066 |
+
"step": 6191
|
| 47067 |
+
},
|
| 47068 |
+
{
|
| 47069 |
+
"epoch": 0.9136112135743268,
|
| 47070 |
+
"grad_norm": 0.8473356366157532,
|
| 47071 |
+
"learning_rate": 4.505837240327882e-07,
|
| 47072 |
+
"loss": 0.0098,
|
| 47073 |
+
"step": 6192
|
| 47074 |
+
},
|
| 47075 |
+
{
|
| 47076 |
+
"epoch": 0.9137587606049429,
|
| 47077 |
+
"grad_norm": 0.7812674045562744,
|
| 47078 |
+
"learning_rate": 4.490562078150962e-07,
|
| 47079 |
+
"loss": 0.0172,
|
| 47080 |
+
"step": 6193
|
| 47081 |
+
},
|
| 47082 |
+
{
|
| 47083 |
+
"epoch": 0.9139063076355588,
|
| 47084 |
+
"grad_norm": 2.3568999767303467,
|
| 47085 |
+
"learning_rate": 4.4753122572663397e-07,
|
| 47086 |
+
"loss": 0.0394,
|
| 47087 |
+
"step": 6194
|
| 47088 |
+
},
|
| 47089 |
+
{
|
| 47090 |
+
"epoch": 0.9140538546661748,
|
| 47091 |
+
"grad_norm": 4.555817604064941,
|
| 47092 |
+
"learning_rate": 4.460087781720179e-07,
|
| 47093 |
+
"loss": 0.089,
|
| 47094 |
+
"step": 6195
|
| 47095 |
+
},
|
| 47096 |
+
{
|
| 47097 |
+
"epoch": 0.9142014016967909,
|
| 47098 |
+
"grad_norm": 1.3016505241394043,
|
| 47099 |
+
"learning_rate": 4.4448886555520266e-07,
|
| 47100 |
+
"loss": 0.0215,
|
| 47101 |
+
"step": 6196
|
| 47102 |
+
},
|
| 47103 |
+
{
|
| 47104 |
+
"epoch": 0.9143489487274069,
|
| 47105 |
+
"grad_norm": 1.2897732257843018,
|
| 47106 |
+
"learning_rate": 4.4297148827946e-07,
|
| 47107 |
+
"loss": 0.0215,
|
| 47108 |
+
"step": 6197
|
| 47109 |
+
},
|
| 47110 |
+
{
|
| 47111 |
+
"epoch": 0.9144964957580228,
|
| 47112 |
+
"grad_norm": 2.542720079421997,
|
| 47113 |
+
"learning_rate": 4.414566467473891e-07,
|
| 47114 |
+
"loss": 0.026,
|
| 47115 |
+
"step": 6198
|
| 47116 |
+
},
|
| 47117 |
+
{
|
| 47118 |
+
"epoch": 0.9146440427886389,
|
| 47119 |
+
"grad_norm": 1.3125649690628052,
|
| 47120 |
+
"learning_rate": 4.399443413609228e-07,
|
| 47121 |
+
"loss": 0.038,
|
| 47122 |
+
"step": 6199
|
| 47123 |
+
},
|
| 47124 |
+
{
|
| 47125 |
+
"epoch": 0.9147915898192549,
|
| 47126 |
+
"grad_norm": 2.4134104251861572,
|
| 47127 |
+
"learning_rate": 4.384345725213157e-07,
|
| 47128 |
+
"loss": 0.0631,
|
| 47129 |
+
"step": 6200
|
| 47130 |
+
},
|
| 47131 |
+
{
|
| 47132 |
+
"epoch": 0.9147915898192549,
|
| 47133 |
+
"eval_accuracy": 0.9782923299565847,
|
| 47134 |
+
"eval_f1": 0.9629629629629629,
|
| 47135 |
+
"eval_loss": 0.055400192737579346,
|
| 47136 |
+
"eval_precision": 0.9798994974874372,
|
| 47137 |
+
"eval_recall": 0.9466019417475728,
|
| 47138 |
+
"eval_runtime": 49.8115,
|
| 47139 |
+
"eval_samples_per_second": 5.842,
|
| 47140 |
+
"eval_steps_per_second": 0.201,
|
| 47141 |
+
"step": 6200
|
| 47142 |
}
|
| 47143 |
],
|
| 47144 |
"logging_steps": 1,
|
|
|
|
| 47158 |
"attributes": {}
|
| 47159 |
}
|
| 47160 |
},
|
| 47161 |
+
"total_flos": 1.9099781569372488e+18,
|
| 47162 |
"train_batch_size": 8,
|
| 47163 |
"trial_name": null,
|
| 47164 |
"trial_params": null
|