Fanucci
commited on
Training in progress, step 1800, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 671149168
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d795df17d9b7fe8c9ac8d2362ea4860ca39ed8d52b0fbe5bf336a1f2fa317e77
|
| 3 |
size 671149168
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1342555602
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4e3c4bbe94ecbf3d0950b966656e61c8ea22abd435d84c44e0d7be41d57eaec
|
| 3 |
size 1342555602
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7db7842f0176405e8da957b45ced6fc4f7e8100050b6e7dd0da125f17bacf473
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb7b29af2cae40a0036f379eb1aaba9d4a094815aa5e68be1db2734f14bd9fbd
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 3.1351470947265625,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-600",
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 200,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -11279,6 +11279,1414 @@
|
|
| 11279 |
"eval_samples_per_second": 5.624,
|
| 11280 |
"eval_steps_per_second": 1.406,
|
| 11281 |
"step": 1600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11282 |
}
|
| 11283 |
],
|
| 11284 |
"logging_steps": 1,
|
|
@@ -11293,7 +12701,7 @@
|
|
| 11293 |
"early_stopping_threshold": 0.0
|
| 11294 |
},
|
| 11295 |
"attributes": {
|
| 11296 |
-
"early_stopping_patience_counter":
|
| 11297 |
}
|
| 11298 |
},
|
| 11299 |
"TrainerControl": {
|
|
@@ -11302,12 +12710,12 @@
|
|
| 11302 |
"should_evaluate": false,
|
| 11303 |
"should_log": false,
|
| 11304 |
"should_save": true,
|
| 11305 |
-
"should_training_stop":
|
| 11306 |
},
|
| 11307 |
"attributes": {}
|
| 11308 |
}
|
| 11309 |
},
|
| 11310 |
-
"total_flos": 1.
|
| 11311 |
"train_batch_size": 4,
|
| 11312 |
"trial_name": null,
|
| 11313 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 3.1351470947265625,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-600",
|
| 4 |
+
"epoch": 0.5759078547432411,
|
| 5 |
"eval_steps": 200,
|
| 6 |
+
"global_step": 1800,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 11279 |
"eval_samples_per_second": 5.624,
|
| 11280 |
"eval_steps_per_second": 1.406,
|
| 11281 |
"step": 1600
|
| 11282 |
+
},
|
| 11283 |
+
{
|
| 11284 |
+
"epoch": 0.5122380419132939,
|
| 11285 |
+
"grad_norm": 56.0380744934082,
|
| 11286 |
+
"learning_rate": 0.00019709336586884495,
|
| 11287 |
+
"loss": 6.848,
|
| 11288 |
+
"step": 1601
|
| 11289 |
+
},
|
| 11290 |
+
{
|
| 11291 |
+
"epoch": 0.5125579907214846,
|
| 11292 |
+
"grad_norm": 299.5946044921875,
|
| 11293 |
+
"learning_rate": 0.00019689811903639273,
|
| 11294 |
+
"loss": 6.3987,
|
| 11295 |
+
"step": 1602
|
| 11296 |
+
},
|
| 11297 |
+
{
|
| 11298 |
+
"epoch": 0.5128779395296752,
|
| 11299 |
+
"grad_norm": 10.880533218383789,
|
| 11300 |
+
"learning_rate": 0.00019670288610454474,
|
| 11301 |
+
"loss": 6.7342,
|
| 11302 |
+
"step": 1603
|
| 11303 |
+
},
|
| 11304 |
+
{
|
| 11305 |
+
"epoch": 0.513197888337866,
|
| 11306 |
+
"grad_norm": 355.7524719238281,
|
| 11307 |
+
"learning_rate": 0.0001965076672804357,
|
| 11308 |
+
"loss": 6.8169,
|
| 11309 |
+
"step": 1604
|
| 11310 |
+
},
|
| 11311 |
+
{
|
| 11312 |
+
"epoch": 0.5135178371460566,
|
| 11313 |
+
"grad_norm": 2261.9794921875,
|
| 11314 |
+
"learning_rate": 0.00019631246277118555,
|
| 11315 |
+
"loss": 6.3585,
|
| 11316 |
+
"step": 1605
|
| 11317 |
+
},
|
| 11318 |
+
{
|
| 11319 |
+
"epoch": 0.5138377859542473,
|
| 11320 |
+
"grad_norm": 1438.713134765625,
|
| 11321 |
+
"learning_rate": 0.00019611727278389898,
|
| 11322 |
+
"loss": 6.8331,
|
| 11323 |
+
"step": 1606
|
| 11324 |
+
},
|
| 11325 |
+
{
|
| 11326 |
+
"epoch": 0.514157734762438,
|
| 11327 |
+
"grad_norm": 745.37744140625,
|
| 11328 |
+
"learning_rate": 0.00019592209752566513,
|
| 11329 |
+
"loss": 6.8245,
|
| 11330 |
+
"step": 1607
|
| 11331 |
+
},
|
| 11332 |
+
{
|
| 11333 |
+
"epoch": 0.5144776835706287,
|
| 11334 |
+
"grad_norm": 58.433902740478516,
|
| 11335 |
+
"learning_rate": 0.0001957269372035578,
|
| 11336 |
+
"loss": 6.8176,
|
| 11337 |
+
"step": 1608
|
| 11338 |
+
},
|
| 11339 |
+
{
|
| 11340 |
+
"epoch": 0.5147976323788194,
|
| 11341 |
+
"grad_norm": 17.290542602539062,
|
| 11342 |
+
"learning_rate": 0.00019553179202463453,
|
| 11343 |
+
"loss": 6.855,
|
| 11344 |
+
"step": 1609
|
| 11345 |
+
},
|
| 11346 |
+
{
|
| 11347 |
+
"epoch": 0.5151175811870101,
|
| 11348 |
+
"grad_norm": 33.13834762573242,
|
| 11349 |
+
"learning_rate": 0.0001953366621959372,
|
| 11350 |
+
"loss": 6.8946,
|
| 11351 |
+
"step": 1610
|
| 11352 |
+
},
|
| 11353 |
+
{
|
| 11354 |
+
"epoch": 0.5154375299952008,
|
| 11355 |
+
"grad_norm": 18.5881404876709,
|
| 11356 |
+
"learning_rate": 0.00019514154792449125,
|
| 11357 |
+
"loss": 6.5586,
|
| 11358 |
+
"step": 1611
|
| 11359 |
+
},
|
| 11360 |
+
{
|
| 11361 |
+
"epoch": 0.5157574788033915,
|
| 11362 |
+
"grad_norm": 1200.3309326171875,
|
| 11363 |
+
"learning_rate": 0.00019494644941730547,
|
| 11364 |
+
"loss": 6.5655,
|
| 11365 |
+
"step": 1612
|
| 11366 |
+
},
|
| 11367 |
+
{
|
| 11368 |
+
"epoch": 0.5160774276115822,
|
| 11369 |
+
"grad_norm": 13.903018951416016,
|
| 11370 |
+
"learning_rate": 0.00019475136688137219,
|
| 11371 |
+
"loss": 6.877,
|
| 11372 |
+
"step": 1613
|
| 11373 |
+
},
|
| 11374 |
+
{
|
| 11375 |
+
"epoch": 0.5163973764197728,
|
| 11376 |
+
"grad_norm": 208.99990844726562,
|
| 11377 |
+
"learning_rate": 0.00019455630052366666,
|
| 11378 |
+
"loss": 6.798,
|
| 11379 |
+
"step": 1614
|
| 11380 |
+
},
|
| 11381 |
+
{
|
| 11382 |
+
"epoch": 0.5167173252279635,
|
| 11383 |
+
"grad_norm": 31.752573013305664,
|
| 11384 |
+
"learning_rate": 0.00019436125055114688,
|
| 11385 |
+
"loss": 6.5439,
|
| 11386 |
+
"step": 1615
|
| 11387 |
+
},
|
| 11388 |
+
{
|
| 11389 |
+
"epoch": 0.5170372740361542,
|
| 11390 |
+
"grad_norm": 2381.63525390625,
|
| 11391 |
+
"learning_rate": 0.00019416621717075356,
|
| 11392 |
+
"loss": 6.5238,
|
| 11393 |
+
"step": 1616
|
| 11394 |
+
},
|
| 11395 |
+
{
|
| 11396 |
+
"epoch": 0.5173572228443449,
|
| 11397 |
+
"grad_norm": 2820.570556640625,
|
| 11398 |
+
"learning_rate": 0.0001939712005894099,
|
| 11399 |
+
"loss": 6.6507,
|
| 11400 |
+
"step": 1617
|
| 11401 |
+
},
|
| 11402 |
+
{
|
| 11403 |
+
"epoch": 0.5176771716525356,
|
| 11404 |
+
"grad_norm": 2531.220947265625,
|
| 11405 |
+
"learning_rate": 0.00019377620101402113,
|
| 11406 |
+
"loss": 6.8637,
|
| 11407 |
+
"step": 1618
|
| 11408 |
+
},
|
| 11409 |
+
{
|
| 11410 |
+
"epoch": 0.5179971204607263,
|
| 11411 |
+
"grad_norm": 14558.5615234375,
|
| 11412 |
+
"learning_rate": 0.00019358121865147446,
|
| 11413 |
+
"loss": 6.5084,
|
| 11414 |
+
"step": 1619
|
| 11415 |
+
},
|
| 11416 |
+
{
|
| 11417 |
+
"epoch": 0.518317069268917,
|
| 11418 |
+
"grad_norm": 119.33515167236328,
|
| 11419 |
+
"learning_rate": 0.00019338625370863882,
|
| 11420 |
+
"loss": 6.6764,
|
| 11421 |
+
"step": 1620
|
| 11422 |
+
},
|
| 11423 |
+
{
|
| 11424 |
+
"epoch": 0.5186370180771077,
|
| 11425 |
+
"grad_norm": 4097.06103515625,
|
| 11426 |
+
"learning_rate": 0.00019319130639236477,
|
| 11427 |
+
"loss": 6.5498,
|
| 11428 |
+
"step": 1621
|
| 11429 |
+
},
|
| 11430 |
+
{
|
| 11431 |
+
"epoch": 0.5189569668852984,
|
| 11432 |
+
"grad_norm": 118.87272644042969,
|
| 11433 |
+
"learning_rate": 0.00019299637690948414,
|
| 11434 |
+
"loss": 6.7413,
|
| 11435 |
+
"step": 1622
|
| 11436 |
+
},
|
| 11437 |
+
{
|
| 11438 |
+
"epoch": 0.519276915693489,
|
| 11439 |
+
"grad_norm": 319.0539245605469,
|
| 11440 |
+
"learning_rate": 0.0001928014654668096,
|
| 11441 |
+
"loss": 6.8565,
|
| 11442 |
+
"step": 1623
|
| 11443 |
+
},
|
| 11444 |
+
{
|
| 11445 |
+
"epoch": 0.5195968645016797,
|
| 11446 |
+
"grad_norm": 1015.8532104492188,
|
| 11447 |
+
"learning_rate": 0.00019260657227113513,
|
| 11448 |
+
"loss": 6.9909,
|
| 11449 |
+
"step": 1624
|
| 11450 |
+
},
|
| 11451 |
+
{
|
| 11452 |
+
"epoch": 0.5199168133098704,
|
| 11453 |
+
"grad_norm": 6110.1357421875,
|
| 11454 |
+
"learning_rate": 0.000192411697529235,
|
| 11455 |
+
"loss": 6.6244,
|
| 11456 |
+
"step": 1625
|
| 11457 |
+
},
|
| 11458 |
+
{
|
| 11459 |
+
"epoch": 0.5202367621180611,
|
| 11460 |
+
"grad_norm": 920.1796264648438,
|
| 11461 |
+
"learning_rate": 0.0001922168414478639,
|
| 11462 |
+
"loss": 6.6993,
|
| 11463 |
+
"step": 1626
|
| 11464 |
+
},
|
| 11465 |
+
{
|
| 11466 |
+
"epoch": 0.5205567109262518,
|
| 11467 |
+
"grad_norm": 1895.49169921875,
|
| 11468 |
+
"learning_rate": 0.00019202200423375695,
|
| 11469 |
+
"loss": 6.7248,
|
| 11470 |
+
"step": 1627
|
| 11471 |
+
},
|
| 11472 |
+
{
|
| 11473 |
+
"epoch": 0.5208766597344425,
|
| 11474 |
+
"grad_norm": 190.3346710205078,
|
| 11475 |
+
"learning_rate": 0.00019182718609362913,
|
| 11476 |
+
"loss": 6.6856,
|
| 11477 |
+
"step": 1628
|
| 11478 |
+
},
|
| 11479 |
+
{
|
| 11480 |
+
"epoch": 0.5211966085426332,
|
| 11481 |
+
"grad_norm": 1900.810302734375,
|
| 11482 |
+
"learning_rate": 0.0001916323872341751,
|
| 11483 |
+
"loss": 6.4773,
|
| 11484 |
+
"step": 1629
|
| 11485 |
+
},
|
| 11486 |
+
{
|
| 11487 |
+
"epoch": 0.5215165573508239,
|
| 11488 |
+
"grad_norm": 372.069091796875,
|
| 11489 |
+
"learning_rate": 0.00019143760786206922,
|
| 11490 |
+
"loss": 6.8977,
|
| 11491 |
+
"step": 1630
|
| 11492 |
+
},
|
| 11493 |
+
{
|
| 11494 |
+
"epoch": 0.5218365061590146,
|
| 11495 |
+
"grad_norm": 470.37896728515625,
|
| 11496 |
+
"learning_rate": 0.00019124284818396498,
|
| 11497 |
+
"loss": 6.7348,
|
| 11498 |
+
"step": 1631
|
| 11499 |
+
},
|
| 11500 |
+
{
|
| 11501 |
+
"epoch": 0.5221564549672052,
|
| 11502 |
+
"grad_norm": 345.2624206542969,
|
| 11503 |
+
"learning_rate": 0.00019104810840649518,
|
| 11504 |
+
"loss": 6.5007,
|
| 11505 |
+
"step": 1632
|
| 11506 |
+
},
|
| 11507 |
+
{
|
| 11508 |
+
"epoch": 0.522476403775396,
|
| 11509 |
+
"grad_norm": 14990.578125,
|
| 11510 |
+
"learning_rate": 0.0001908533887362715,
|
| 11511 |
+
"loss": 6.7623,
|
| 11512 |
+
"step": 1633
|
| 11513 |
+
},
|
| 11514 |
+
{
|
| 11515 |
+
"epoch": 0.5227963525835866,
|
| 11516 |
+
"grad_norm": 8434.7958984375,
|
| 11517 |
+
"learning_rate": 0.00019065868937988398,
|
| 11518 |
+
"loss": 6.4984,
|
| 11519 |
+
"step": 1634
|
| 11520 |
+
},
|
| 11521 |
+
{
|
| 11522 |
+
"epoch": 0.5231163013917773,
|
| 11523 |
+
"grad_norm": 393.5131530761719,
|
| 11524 |
+
"learning_rate": 0.0001904640105439015,
|
| 11525 |
+
"loss": 6.7975,
|
| 11526 |
+
"step": 1635
|
| 11527 |
+
},
|
| 11528 |
+
{
|
| 11529 |
+
"epoch": 0.523436250199968,
|
| 11530 |
+
"grad_norm": 68.34281921386719,
|
| 11531 |
+
"learning_rate": 0.00019026935243487105,
|
| 11532 |
+
"loss": 6.8138,
|
| 11533 |
+
"step": 1636
|
| 11534 |
+
},
|
| 11535 |
+
{
|
| 11536 |
+
"epoch": 0.5237561990081587,
|
| 11537 |
+
"grad_norm": 2655.11767578125,
|
| 11538 |
+
"learning_rate": 0.00019007471525931736,
|
| 11539 |
+
"loss": 6.7472,
|
| 11540 |
+
"step": 1637
|
| 11541 |
+
},
|
| 11542 |
+
{
|
| 11543 |
+
"epoch": 0.5240761478163494,
|
| 11544 |
+
"grad_norm": 271.6803894042969,
|
| 11545 |
+
"learning_rate": 0.00018988009922374336,
|
| 11546 |
+
"loss": 6.7314,
|
| 11547 |
+
"step": 1638
|
| 11548 |
+
},
|
| 11549 |
+
{
|
| 11550 |
+
"epoch": 0.5243960966245401,
|
| 11551 |
+
"grad_norm": 49.694091796875,
|
| 11552 |
+
"learning_rate": 0.00018968550453462945,
|
| 11553 |
+
"loss": 6.5887,
|
| 11554 |
+
"step": 1639
|
| 11555 |
+
},
|
| 11556 |
+
{
|
| 11557 |
+
"epoch": 0.5247160454327308,
|
| 11558 |
+
"grad_norm": 40.31327819824219,
|
| 11559 |
+
"learning_rate": 0.00018949093139843294,
|
| 11560 |
+
"loss": 6.5703,
|
| 11561 |
+
"step": 1640
|
| 11562 |
+
},
|
| 11563 |
+
{
|
| 11564 |
+
"epoch": 0.5250359942409214,
|
| 11565 |
+
"grad_norm": 606.8063354492188,
|
| 11566 |
+
"learning_rate": 0.000189296380021589,
|
| 11567 |
+
"loss": 6.6527,
|
| 11568 |
+
"step": 1641
|
| 11569 |
+
},
|
| 11570 |
+
{
|
| 11571 |
+
"epoch": 0.5253559430491122,
|
| 11572 |
+
"grad_norm": 328.6357727050781,
|
| 11573 |
+
"learning_rate": 0.0001891018506105091,
|
| 11574 |
+
"loss": 6.9398,
|
| 11575 |
+
"step": 1642
|
| 11576 |
+
},
|
| 11577 |
+
{
|
| 11578 |
+
"epoch": 0.5256758918573028,
|
| 11579 |
+
"grad_norm": 14.112878799438477,
|
| 11580 |
+
"learning_rate": 0.00018890734337158172,
|
| 11581 |
+
"loss": 6.6962,
|
| 11582 |
+
"step": 1643
|
| 11583 |
+
},
|
| 11584 |
+
{
|
| 11585 |
+
"epoch": 0.5259958406654935,
|
| 11586 |
+
"grad_norm": 42.98262405395508,
|
| 11587 |
+
"learning_rate": 0.00018871285851117188,
|
| 11588 |
+
"loss": 6.6392,
|
| 11589 |
+
"step": 1644
|
| 11590 |
+
},
|
| 11591 |
+
{
|
| 11592 |
+
"epoch": 0.5263157894736842,
|
| 11593 |
+
"grad_norm": 71.3088150024414,
|
| 11594 |
+
"learning_rate": 0.00018851839623562047,
|
| 11595 |
+
"loss": 6.5748,
|
| 11596 |
+
"step": 1645
|
| 11597 |
+
},
|
| 11598 |
+
{
|
| 11599 |
+
"epoch": 0.5266357382818749,
|
| 11600 |
+
"grad_norm": 28.81409454345703,
|
| 11601 |
+
"learning_rate": 0.0001883239567512448,
|
| 11602 |
+
"loss": 6.6802,
|
| 11603 |
+
"step": 1646
|
| 11604 |
+
},
|
| 11605 |
+
{
|
| 11606 |
+
"epoch": 0.5269556870900656,
|
| 11607 |
+
"grad_norm": 8.297423362731934,
|
| 11608 |
+
"learning_rate": 0.000188129540264338,
|
| 11609 |
+
"loss": 6.5324,
|
| 11610 |
+
"step": 1647
|
| 11611 |
+
},
|
| 11612 |
+
{
|
| 11613 |
+
"epoch": 0.5272756358982563,
|
| 11614 |
+
"grad_norm": 16.27468490600586,
|
| 11615 |
+
"learning_rate": 0.0001879351469811684,
|
| 11616 |
+
"loss": 6.5321,
|
| 11617 |
+
"step": 1648
|
| 11618 |
+
},
|
| 11619 |
+
{
|
| 11620 |
+
"epoch": 0.527595584706447,
|
| 11621 |
+
"grad_norm": 9.677922248840332,
|
| 11622 |
+
"learning_rate": 0.00018774077710798014,
|
| 11623 |
+
"loss": 6.5972,
|
| 11624 |
+
"step": 1649
|
| 11625 |
+
},
|
| 11626 |
+
{
|
| 11627 |
+
"epoch": 0.5279155335146376,
|
| 11628 |
+
"grad_norm": 36.90768814086914,
|
| 11629 |
+
"learning_rate": 0.00018754643085099247,
|
| 11630 |
+
"loss": 6.8554,
|
| 11631 |
+
"step": 1650
|
| 11632 |
+
},
|
| 11633 |
+
{
|
| 11634 |
+
"epoch": 0.5282354823228284,
|
| 11635 |
+
"grad_norm": 38.28261947631836,
|
| 11636 |
+
"learning_rate": 0.00018735210841639918,
|
| 11637 |
+
"loss": 6.5748,
|
| 11638 |
+
"step": 1651
|
| 11639 |
+
},
|
| 11640 |
+
{
|
| 11641 |
+
"epoch": 0.528555431131019,
|
| 11642 |
+
"grad_norm": 8.838459968566895,
|
| 11643 |
+
"learning_rate": 0.00018715781001036938,
|
| 11644 |
+
"loss": 6.6891,
|
| 11645 |
+
"step": 1652
|
| 11646 |
+
},
|
| 11647 |
+
{
|
| 11648 |
+
"epoch": 0.5288753799392097,
|
| 11649 |
+
"grad_norm": 9.053256034851074,
|
| 11650 |
+
"learning_rate": 0.0001869635358390462,
|
| 11651 |
+
"loss": 6.6757,
|
| 11652 |
+
"step": 1653
|
| 11653 |
+
},
|
| 11654 |
+
{
|
| 11655 |
+
"epoch": 0.5291953287474004,
|
| 11656 |
+
"grad_norm": 13.860633850097656,
|
| 11657 |
+
"learning_rate": 0.00018676928610854727,
|
| 11658 |
+
"loss": 6.7456,
|
| 11659 |
+
"step": 1654
|
| 11660 |
+
},
|
| 11661 |
+
{
|
| 11662 |
+
"epoch": 0.5295152775555911,
|
| 11663 |
+
"grad_norm": 6.551642417907715,
|
| 11664 |
+
"learning_rate": 0.00018657506102496443,
|
| 11665 |
+
"loss": 6.4432,
|
| 11666 |
+
"step": 1655
|
| 11667 |
+
},
|
| 11668 |
+
{
|
| 11669 |
+
"epoch": 0.5298352263637818,
|
| 11670 |
+
"grad_norm": 6.068846702575684,
|
| 11671 |
+
"learning_rate": 0.000186380860794363,
|
| 11672 |
+
"loss": 6.5114,
|
| 11673 |
+
"step": 1656
|
| 11674 |
+
},
|
| 11675 |
+
{
|
| 11676 |
+
"epoch": 0.5301551751719725,
|
| 11677 |
+
"grad_norm": 298.4500732421875,
|
| 11678 |
+
"learning_rate": 0.00018618668562278218,
|
| 11679 |
+
"loss": 6.5394,
|
| 11680 |
+
"step": 1657
|
| 11681 |
+
},
|
| 11682 |
+
{
|
| 11683 |
+
"epoch": 0.5304751239801632,
|
| 11684 |
+
"grad_norm": 6.769711971282959,
|
| 11685 |
+
"learning_rate": 0.00018599253571623472,
|
| 11686 |
+
"loss": 6.6384,
|
| 11687 |
+
"step": 1658
|
| 11688 |
+
},
|
| 11689 |
+
{
|
| 11690 |
+
"epoch": 0.5307950727883539,
|
| 11691 |
+
"grad_norm": 22.864620208740234,
|
| 11692 |
+
"learning_rate": 0.0001857984112807061,
|
| 11693 |
+
"loss": 6.7885,
|
| 11694 |
+
"step": 1659
|
| 11695 |
+
},
|
| 11696 |
+
{
|
| 11697 |
+
"epoch": 0.5311150215965446,
|
| 11698 |
+
"grad_norm": 5.3118720054626465,
|
| 11699 |
+
"learning_rate": 0.00018560431252215528,
|
| 11700 |
+
"loss": 6.6045,
|
| 11701 |
+
"step": 1660
|
| 11702 |
+
},
|
| 11703 |
+
{
|
| 11704 |
+
"epoch": 0.5314349704047352,
|
| 11705 |
+
"grad_norm": 60.282066345214844,
|
| 11706 |
+
"learning_rate": 0.00018541023964651382,
|
| 11707 |
+
"loss": 6.6525,
|
| 11708 |
+
"step": 1661
|
| 11709 |
+
},
|
| 11710 |
+
{
|
| 11711 |
+
"epoch": 0.531754919212926,
|
| 11712 |
+
"grad_norm": 6.3130035400390625,
|
| 11713 |
+
"learning_rate": 0.00018521619285968545,
|
| 11714 |
+
"loss": 6.8046,
|
| 11715 |
+
"step": 1662
|
| 11716 |
+
},
|
| 11717 |
+
{
|
| 11718 |
+
"epoch": 0.5320748680211166,
|
| 11719 |
+
"grad_norm": 7.7585954666137695,
|
| 11720 |
+
"learning_rate": 0.00018502217236754682,
|
| 11721 |
+
"loss": 6.9111,
|
| 11722 |
+
"step": 1663
|
| 11723 |
+
},
|
| 11724 |
+
{
|
| 11725 |
+
"epoch": 0.5323948168293073,
|
| 11726 |
+
"grad_norm": 14.32033920288086,
|
| 11727 |
+
"learning_rate": 0.00018482817837594634,
|
| 11728 |
+
"loss": 6.5773,
|
| 11729 |
+
"step": 1664
|
| 11730 |
+
},
|
| 11731 |
+
{
|
| 11732 |
+
"epoch": 0.532714765637498,
|
| 11733 |
+
"grad_norm": 54.441688537597656,
|
| 11734 |
+
"learning_rate": 0.00018463421109070422,
|
| 11735 |
+
"loss": 6.4621,
|
| 11736 |
+
"step": 1665
|
| 11737 |
+
},
|
| 11738 |
+
{
|
| 11739 |
+
"epoch": 0.5330347144456887,
|
| 11740 |
+
"grad_norm": 8.818940162658691,
|
| 11741 |
+
"learning_rate": 0.00018444027071761268,
|
| 11742 |
+
"loss": 6.6185,
|
| 11743 |
+
"step": 1666
|
| 11744 |
+
},
|
| 11745 |
+
{
|
| 11746 |
+
"epoch": 0.5333546632538794,
|
| 11747 |
+
"grad_norm": 8.330538749694824,
|
| 11748 |
+
"learning_rate": 0.000184246357462435,
|
| 11749 |
+
"loss": 6.7875,
|
| 11750 |
+
"step": 1667
|
| 11751 |
+
},
|
| 11752 |
+
{
|
| 11753 |
+
"epoch": 0.5336746120620701,
|
| 11754 |
+
"grad_norm": 8.418845176696777,
|
| 11755 |
+
"learning_rate": 0.00018405247153090588,
|
| 11756 |
+
"loss": 6.6936,
|
| 11757 |
+
"step": 1668
|
| 11758 |
+
},
|
| 11759 |
+
{
|
| 11760 |
+
"epoch": 0.5339945608702608,
|
| 11761 |
+
"grad_norm": 4.972550392150879,
|
| 11762 |
+
"learning_rate": 0.0001838586131287312,
|
| 11763 |
+
"loss": 6.6284,
|
| 11764 |
+
"step": 1669
|
| 11765 |
+
},
|
| 11766 |
+
{
|
| 11767 |
+
"epoch": 0.5343145096784514,
|
| 11768 |
+
"grad_norm": 4.819149971008301,
|
| 11769 |
+
"learning_rate": 0.00018366478246158722,
|
| 11770 |
+
"loss": 6.6123,
|
| 11771 |
+
"step": 1670
|
| 11772 |
+
},
|
| 11773 |
+
{
|
| 11774 |
+
"epoch": 0.5346344584866422,
|
| 11775 |
+
"grad_norm": 5.511754512786865,
|
| 11776 |
+
"learning_rate": 0.0001834709797351211,
|
| 11777 |
+
"loss": 6.5928,
|
| 11778 |
+
"step": 1671
|
| 11779 |
+
},
|
| 11780 |
+
{
|
| 11781 |
+
"epoch": 0.5349544072948328,
|
| 11782 |
+
"grad_norm": 4.756268501281738,
|
| 11783 |
+
"learning_rate": 0.00018327720515495043,
|
| 11784 |
+
"loss": 6.719,
|
| 11785 |
+
"step": 1672
|
| 11786 |
+
},
|
| 11787 |
+
{
|
| 11788 |
+
"epoch": 0.5352743561030235,
|
| 11789 |
+
"grad_norm": 16.621448516845703,
|
| 11790 |
+
"learning_rate": 0.00018308345892666247,
|
| 11791 |
+
"loss": 6.6068,
|
| 11792 |
+
"step": 1673
|
| 11793 |
+
},
|
| 11794 |
+
{
|
| 11795 |
+
"epoch": 0.5355943049112142,
|
| 11796 |
+
"grad_norm": 7.359424591064453,
|
| 11797 |
+
"learning_rate": 0.0001828897412558149,
|
| 11798 |
+
"loss": 6.4707,
|
| 11799 |
+
"step": 1674
|
| 11800 |
+
},
|
| 11801 |
+
{
|
| 11802 |
+
"epoch": 0.5359142537194049,
|
| 11803 |
+
"grad_norm": 5.423340320587158,
|
| 11804 |
+
"learning_rate": 0.00018269605234793492,
|
| 11805 |
+
"loss": 6.5131,
|
| 11806 |
+
"step": 1675
|
| 11807 |
+
},
|
| 11808 |
+
{
|
| 11809 |
+
"epoch": 0.5362342025275956,
|
| 11810 |
+
"grad_norm": 6.9871320724487305,
|
| 11811 |
+
"learning_rate": 0.00018250239240851898,
|
| 11812 |
+
"loss": 6.4692,
|
| 11813 |
+
"step": 1676
|
| 11814 |
+
},
|
| 11815 |
+
{
|
| 11816 |
+
"epoch": 0.5365541513357863,
|
| 11817 |
+
"grad_norm": 204.94081115722656,
|
| 11818 |
+
"learning_rate": 0.00018230876164303334,
|
| 11819 |
+
"loss": 6.6845,
|
| 11820 |
+
"step": 1677
|
| 11821 |
+
},
|
| 11822 |
+
{
|
| 11823 |
+
"epoch": 0.536874100143977,
|
| 11824 |
+
"grad_norm": 6.5210700035095215,
|
| 11825 |
+
"learning_rate": 0.00018211516025691267,
|
| 11826 |
+
"loss": 6.634,
|
| 11827 |
+
"step": 1678
|
| 11828 |
+
},
|
| 11829 |
+
{
|
| 11830 |
+
"epoch": 0.5371940489521676,
|
| 11831 |
+
"grad_norm": 6.336790084838867,
|
| 11832 |
+
"learning_rate": 0.00018192158845556087,
|
| 11833 |
+
"loss": 6.4995,
|
| 11834 |
+
"step": 1679
|
| 11835 |
+
},
|
| 11836 |
+
{
|
| 11837 |
+
"epoch": 0.5375139977603584,
|
| 11838 |
+
"grad_norm": 5.2590718269348145,
|
| 11839 |
+
"learning_rate": 0.00018172804644435054,
|
| 11840 |
+
"loss": 6.472,
|
| 11841 |
+
"step": 1680
|
| 11842 |
+
},
|
| 11843 |
+
{
|
| 11844 |
+
"epoch": 0.537833946568549,
|
| 11845 |
+
"grad_norm": 7.329125881195068,
|
| 11846 |
+
"learning_rate": 0.00018153453442862222,
|
| 11847 |
+
"loss": 6.7036,
|
| 11848 |
+
"step": 1681
|
| 11849 |
+
},
|
| 11850 |
+
{
|
| 11851 |
+
"epoch": 0.5381538953767397,
|
| 11852 |
+
"grad_norm": 9.183751106262207,
|
| 11853 |
+
"learning_rate": 0.00018134105261368499,
|
| 11854 |
+
"loss": 6.676,
|
| 11855 |
+
"step": 1682
|
| 11856 |
+
},
|
| 11857 |
+
{
|
| 11858 |
+
"epoch": 0.5384738441849304,
|
| 11859 |
+
"grad_norm": 10.038751602172852,
|
| 11860 |
+
"learning_rate": 0.00018114760120481598,
|
| 11861 |
+
"loss": 6.6576,
|
| 11862 |
+
"step": 1683
|
| 11863 |
+
},
|
| 11864 |
+
{
|
| 11865 |
+
"epoch": 0.5387937929931211,
|
| 11866 |
+
"grad_norm": 8.166698455810547,
|
| 11867 |
+
"learning_rate": 0.00018095418040725965,
|
| 11868 |
+
"loss": 6.5329,
|
| 11869 |
+
"step": 1684
|
| 11870 |
+
},
|
| 11871 |
+
{
|
| 11872 |
+
"epoch": 0.5391137418013118,
|
| 11873 |
+
"grad_norm": 8.943130493164062,
|
| 11874 |
+
"learning_rate": 0.0001807607904262282,
|
| 11875 |
+
"loss": 6.8779,
|
| 11876 |
+
"step": 1685
|
| 11877 |
+
},
|
| 11878 |
+
{
|
| 11879 |
+
"epoch": 0.5394336906095025,
|
| 11880 |
+
"grad_norm": 11.258163452148438,
|
| 11881 |
+
"learning_rate": 0.00018056743146690143,
|
| 11882 |
+
"loss": 6.6053,
|
| 11883 |
+
"step": 1686
|
| 11884 |
+
},
|
| 11885 |
+
{
|
| 11886 |
+
"epoch": 0.5397536394176932,
|
| 11887 |
+
"grad_norm": 7.90877628326416,
|
| 11888 |
+
"learning_rate": 0.00018037410373442558,
|
| 11889 |
+
"loss": 6.5302,
|
| 11890 |
+
"step": 1687
|
| 11891 |
+
},
|
| 11892 |
+
{
|
| 11893 |
+
"epoch": 0.5400735882258838,
|
| 11894 |
+
"grad_norm": 10.706826210021973,
|
| 11895 |
+
"learning_rate": 0.0001801808074339144,
|
| 11896 |
+
"loss": 6.5518,
|
| 11897 |
+
"step": 1688
|
| 11898 |
+
},
|
| 11899 |
+
{
|
| 11900 |
+
"epoch": 0.5403935370340746,
|
| 11901 |
+
"grad_norm": 5.881041526794434,
|
| 11902 |
+
"learning_rate": 0.00017998754277044786,
|
| 11903 |
+
"loss": 6.8653,
|
| 11904 |
+
"step": 1689
|
| 11905 |
+
},
|
| 11906 |
+
{
|
| 11907 |
+
"epoch": 0.5407134858422652,
|
| 11908 |
+
"grad_norm": 7.093472003936768,
|
| 11909 |
+
"learning_rate": 0.00017979430994907253,
|
| 11910 |
+
"loss": 6.8847,
|
| 11911 |
+
"step": 1690
|
| 11912 |
+
},
|
| 11913 |
+
{
|
| 11914 |
+
"epoch": 0.541033434650456,
|
| 11915 |
+
"grad_norm": 7.041862964630127,
|
| 11916 |
+
"learning_rate": 0.0001796011091748013,
|
| 11917 |
+
"loss": 6.6155,
|
| 11918 |
+
"step": 1691
|
| 11919 |
+
},
|
| 11920 |
+
{
|
| 11921 |
+
"epoch": 0.5413533834586466,
|
| 11922 |
+
"grad_norm": 6.642461776733398,
|
| 11923 |
+
"learning_rate": 0.0001794079406526128,
|
| 11924 |
+
"loss": 6.5458,
|
| 11925 |
+
"step": 1692
|
| 11926 |
+
},
|
| 11927 |
+
{
|
| 11928 |
+
"epoch": 0.5416733322668373,
|
| 11929 |
+
"grad_norm": 6.088051795959473,
|
| 11930 |
+
"learning_rate": 0.0001792148045874516,
|
| 11931 |
+
"loss": 6.7226,
|
| 11932 |
+
"step": 1693
|
| 11933 |
+
},
|
| 11934 |
+
{
|
| 11935 |
+
"epoch": 0.541993281075028,
|
| 11936 |
+
"grad_norm": 8.008899688720703,
|
| 11937 |
+
"learning_rate": 0.000179021701184228,
|
| 11938 |
+
"loss": 6.7852,
|
| 11939 |
+
"step": 1694
|
| 11940 |
+
},
|
| 11941 |
+
{
|
| 11942 |
+
"epoch": 0.5423132298832187,
|
| 11943 |
+
"grad_norm": 5.187869071960449,
|
| 11944 |
+
"learning_rate": 0.00017882863064781721,
|
| 11945 |
+
"loss": 6.6511,
|
| 11946 |
+
"step": 1695
|
| 11947 |
+
},
|
| 11948 |
+
{
|
| 11949 |
+
"epoch": 0.5426331786914094,
|
| 11950 |
+
"grad_norm": 47.277042388916016,
|
| 11951 |
+
"learning_rate": 0.00017863559318305992,
|
| 11952 |
+
"loss": 6.5314,
|
| 11953 |
+
"step": 1696
|
| 11954 |
+
},
|
| 11955 |
+
{
|
| 11956 |
+
"epoch": 0.5429531274996001,
|
| 11957 |
+
"grad_norm": 8.942819595336914,
|
| 11958 |
+
"learning_rate": 0.00017844258899476183,
|
| 11959 |
+
"loss": 6.6763,
|
| 11960 |
+
"step": 1697
|
| 11961 |
+
},
|
| 11962 |
+
{
|
| 11963 |
+
"epoch": 0.5432730763077908,
|
| 11964 |
+
"grad_norm": 9.141765594482422,
|
| 11965 |
+
"learning_rate": 0.00017824961828769286,
|
| 11966 |
+
"loss": 6.5762,
|
| 11967 |
+
"step": 1698
|
| 11968 |
+
},
|
| 11969 |
+
{
|
| 11970 |
+
"epoch": 0.5435930251159814,
|
| 11971 |
+
"grad_norm": 7.712893962860107,
|
| 11972 |
+
"learning_rate": 0.00017805668126658785,
|
| 11973 |
+
"loss": 6.6161,
|
| 11974 |
+
"step": 1699
|
| 11975 |
+
},
|
| 11976 |
+
{
|
| 11977 |
+
"epoch": 0.5439129739241721,
|
| 11978 |
+
"grad_norm": 6.733684539794922,
|
| 11979 |
+
"learning_rate": 0.0001778637781361457,
|
| 11980 |
+
"loss": 6.7396,
|
| 11981 |
+
"step": 1700
|
| 11982 |
+
},
|
| 11983 |
+
{
|
| 11984 |
+
"epoch": 0.5442329227323628,
|
| 11985 |
+
"grad_norm": 7.395744800567627,
|
| 11986 |
+
"learning_rate": 0.0001776709091010293,
|
| 11987 |
+
"loss": 6.694,
|
| 11988 |
+
"step": 1701
|
| 11989 |
+
},
|
| 11990 |
+
{
|
| 11991 |
+
"epoch": 0.5445528715405535,
|
| 11992 |
+
"grad_norm": 6.6596150398254395,
|
| 11993 |
+
"learning_rate": 0.00017747807436586574,
|
| 11994 |
+
"loss": 6.505,
|
| 11995 |
+
"step": 1702
|
| 11996 |
+
},
|
| 11997 |
+
{
|
| 11998 |
+
"epoch": 0.5448728203487442,
|
| 11999 |
+
"grad_norm": 7.438435077667236,
|
| 12000 |
+
"learning_rate": 0.00017728527413524516,
|
| 12001 |
+
"loss": 6.834,
|
| 12002 |
+
"step": 1703
|
| 12003 |
+
},
|
| 12004 |
+
{
|
| 12005 |
+
"epoch": 0.5451927691569349,
|
| 12006 |
+
"grad_norm": 5.447627544403076,
|
| 12007 |
+
"learning_rate": 0.0001770925086137214,
|
| 12008 |
+
"loss": 6.5036,
|
| 12009 |
+
"step": 1704
|
| 12010 |
+
},
|
| 12011 |
+
{
|
| 12012 |
+
"epoch": 0.5455127179651256,
|
| 12013 |
+
"grad_norm": 8.212823867797852,
|
| 12014 |
+
"learning_rate": 0.00017689977800581166,
|
| 12015 |
+
"loss": 6.5378,
|
| 12016 |
+
"step": 1705
|
| 12017 |
+
},
|
| 12018 |
+
{
|
| 12019 |
+
"epoch": 0.5458326667733163,
|
| 12020 |
+
"grad_norm": 10.185394287109375,
|
| 12021 |
+
"learning_rate": 0.00017670708251599568,
|
| 12022 |
+
"loss": 6.647,
|
| 12023 |
+
"step": 1706
|
| 12024 |
+
},
|
| 12025 |
+
{
|
| 12026 |
+
"epoch": 0.546152615581507,
|
| 12027 |
+
"grad_norm": 7.351102352142334,
|
| 12028 |
+
"learning_rate": 0.00017651442234871612,
|
| 12029 |
+
"loss": 6.6004,
|
| 12030 |
+
"step": 1707
|
| 12031 |
+
},
|
| 12032 |
+
{
|
| 12033 |
+
"epoch": 0.5464725643896976,
|
| 12034 |
+
"grad_norm": 8.159398078918457,
|
| 12035 |
+
"learning_rate": 0.00017632179770837845,
|
| 12036 |
+
"loss": 6.5938,
|
| 12037 |
+
"step": 1708
|
| 12038 |
+
},
|
| 12039 |
+
{
|
| 12040 |
+
"epoch": 0.5467925131978884,
|
| 12041 |
+
"grad_norm": 5.244987487792969,
|
| 12042 |
+
"learning_rate": 0.0001761292087993499,
|
| 12043 |
+
"loss": 6.8191,
|
| 12044 |
+
"step": 1709
|
| 12045 |
+
},
|
| 12046 |
+
{
|
| 12047 |
+
"epoch": 0.547112462006079,
|
| 12048 |
+
"grad_norm": 5.631025314331055,
|
| 12049 |
+
"learning_rate": 0.00017593665582596026,
|
| 12050 |
+
"loss": 6.5519,
|
| 12051 |
+
"step": 1710
|
| 12052 |
+
},
|
| 12053 |
+
{
|
| 12054 |
+
"epoch": 0.5474324108142697,
|
| 12055 |
+
"grad_norm": 5.977352142333984,
|
| 12056 |
+
"learning_rate": 0.00017574413899250092,
|
| 12057 |
+
"loss": 6.5664,
|
| 12058 |
+
"step": 1711
|
| 12059 |
+
},
|
| 12060 |
+
{
|
| 12061 |
+
"epoch": 0.5477523596224604,
|
| 12062 |
+
"grad_norm": 6.352726459503174,
|
| 12063 |
+
"learning_rate": 0.00017555165850322498,
|
| 12064 |
+
"loss": 6.6962,
|
| 12065 |
+
"step": 1712
|
| 12066 |
+
},
|
| 12067 |
+
{
|
| 12068 |
+
"epoch": 0.5480723084306511,
|
| 12069 |
+
"grad_norm": 8.479165077209473,
|
| 12070 |
+
"learning_rate": 0.00017535921456234715,
|
| 12071 |
+
"loss": 6.3559,
|
| 12072 |
+
"step": 1713
|
| 12073 |
+
},
|
| 12074 |
+
{
|
| 12075 |
+
"epoch": 0.5483922572388418,
|
| 12076 |
+
"grad_norm": 6.653327465057373,
|
| 12077 |
+
"learning_rate": 0.00017516680737404317,
|
| 12078 |
+
"loss": 6.8439,
|
| 12079 |
+
"step": 1714
|
| 12080 |
+
},
|
| 12081 |
+
{
|
| 12082 |
+
"epoch": 0.5487122060470325,
|
| 12083 |
+
"grad_norm": 5.953420162200928,
|
| 12084 |
+
"learning_rate": 0.00017497443714244976,
|
| 12085 |
+
"loss": 6.5994,
|
| 12086 |
+
"step": 1715
|
| 12087 |
+
},
|
| 12088 |
+
{
|
| 12089 |
+
"epoch": 0.5490321548552232,
|
| 12090 |
+
"grad_norm": 5.354004383087158,
|
| 12091 |
+
"learning_rate": 0.00017478210407166473,
|
| 12092 |
+
"loss": 6.797,
|
| 12093 |
+
"step": 1716
|
| 12094 |
+
},
|
| 12095 |
+
{
|
| 12096 |
+
"epoch": 0.5493521036634138,
|
| 12097 |
+
"grad_norm": 8.759894371032715,
|
| 12098 |
+
"learning_rate": 0.00017458980836574604,
|
| 12099 |
+
"loss": 6.7733,
|
| 12100 |
+
"step": 1717
|
| 12101 |
+
},
|
| 12102 |
+
{
|
| 12103 |
+
"epoch": 0.5496720524716046,
|
| 12104 |
+
"grad_norm": 6.662622451782227,
|
| 12105 |
+
"learning_rate": 0.00017439755022871227,
|
| 12106 |
+
"loss": 6.6149,
|
| 12107 |
+
"step": 1718
|
| 12108 |
+
},
|
| 12109 |
+
{
|
| 12110 |
+
"epoch": 0.5499920012797952,
|
| 12111 |
+
"grad_norm": 5.91054105758667,
|
| 12112 |
+
"learning_rate": 0.00017420532986454226,
|
| 12113 |
+
"loss": 6.402,
|
| 12114 |
+
"step": 1719
|
| 12115 |
+
},
|
| 12116 |
+
{
|
| 12117 |
+
"epoch": 0.550311950087986,
|
| 12118 |
+
"grad_norm": 6.375626087188721,
|
| 12119 |
+
"learning_rate": 0.0001740131474771744,
|
| 12120 |
+
"loss": 6.5757,
|
| 12121 |
+
"step": 1720
|
| 12122 |
+
},
|
| 12123 |
+
{
|
| 12124 |
+
"epoch": 0.5506318988961766,
|
| 12125 |
+
"grad_norm": 7.947948932647705,
|
| 12126 |
+
"learning_rate": 0.00017382100327050705,
|
| 12127 |
+
"loss": 6.7252,
|
| 12128 |
+
"step": 1721
|
| 12129 |
+
},
|
| 12130 |
+
{
|
| 12131 |
+
"epoch": 0.5509518477043673,
|
| 12132 |
+
"grad_norm": 670.0234375,
|
| 12133 |
+
"learning_rate": 0.00017362889744839804,
|
| 12134 |
+
"loss": 6.6807,
|
| 12135 |
+
"step": 1722
|
| 12136 |
+
},
|
| 12137 |
+
{
|
| 12138 |
+
"epoch": 0.551271796512558,
|
| 12139 |
+
"grad_norm": 5.699699401855469,
|
| 12140 |
+
"learning_rate": 0.0001734368302146644,
|
| 12141 |
+
"loss": 6.6222,
|
| 12142 |
+
"step": 1723
|
| 12143 |
+
},
|
| 12144 |
+
{
|
| 12145 |
+
"epoch": 0.5515917453207487,
|
| 12146 |
+
"grad_norm": 5.992722034454346,
|
| 12147 |
+
"learning_rate": 0.0001732448017730823,
|
| 12148 |
+
"loss": 6.4582,
|
| 12149 |
+
"step": 1724
|
| 12150 |
+
},
|
| 12151 |
+
{
|
| 12152 |
+
"epoch": 0.5519116941289394,
|
| 12153 |
+
"grad_norm": 4.542269229888916,
|
| 12154 |
+
"learning_rate": 0.00017305281232738668,
|
| 12155 |
+
"loss": 6.699,
|
| 12156 |
+
"step": 1725
|
| 12157 |
+
},
|
| 12158 |
+
{
|
| 12159 |
+
"epoch": 0.55223164293713,
|
| 12160 |
+
"grad_norm": 6.930887222290039,
|
| 12161 |
+
"learning_rate": 0.00017286086208127114,
|
| 12162 |
+
"loss": 6.7525,
|
| 12163 |
+
"step": 1726
|
| 12164 |
+
},
|
| 12165 |
+
{
|
| 12166 |
+
"epoch": 0.5525515917453208,
|
| 12167 |
+
"grad_norm": 7.069804668426514,
|
| 12168 |
+
"learning_rate": 0.00017266895123838776,
|
| 12169 |
+
"loss": 6.6047,
|
| 12170 |
+
"step": 1727
|
| 12171 |
+
},
|
| 12172 |
+
{
|
| 12173 |
+
"epoch": 0.5528715405535114,
|
| 12174 |
+
"grad_norm": 5.9045915603637695,
|
| 12175 |
+
"learning_rate": 0.00017247708000234663,
|
| 12176 |
+
"loss": 6.5806,
|
| 12177 |
+
"step": 1728
|
| 12178 |
+
},
|
| 12179 |
+
{
|
| 12180 |
+
"epoch": 0.5531914893617021,
|
| 12181 |
+
"grad_norm": 7.1390299797058105,
|
| 12182 |
+
"learning_rate": 0.00017228524857671595,
|
| 12183 |
+
"loss": 6.7053,
|
| 12184 |
+
"step": 1729
|
| 12185 |
+
},
|
| 12186 |
+
{
|
| 12187 |
+
"epoch": 0.5535114381698928,
|
| 12188 |
+
"grad_norm": 6.806826591491699,
|
| 12189 |
+
"learning_rate": 0.00017209345716502186,
|
| 12190 |
+
"loss": 6.6264,
|
| 12191 |
+
"step": 1730
|
| 12192 |
+
},
|
| 12193 |
+
{
|
| 12194 |
+
"epoch": 0.5538313869780835,
|
| 12195 |
+
"grad_norm": 6.230347633361816,
|
| 12196 |
+
"learning_rate": 0.0001719017059707476,
|
| 12197 |
+
"loss": 6.7543,
|
| 12198 |
+
"step": 1731
|
| 12199 |
+
},
|
| 12200 |
+
{
|
| 12201 |
+
"epoch": 0.5541513357862742,
|
| 12202 |
+
"grad_norm": 6.432546138763428,
|
| 12203 |
+
"learning_rate": 0.00017170999519733416,
|
| 12204 |
+
"loss": 6.5587,
|
| 12205 |
+
"step": 1732
|
| 12206 |
+
},
|
| 12207 |
+
{
|
| 12208 |
+
"epoch": 0.5544712845944649,
|
| 12209 |
+
"grad_norm": 6.226624488830566,
|
| 12210 |
+
"learning_rate": 0.00017151832504817932,
|
| 12211 |
+
"loss": 6.444,
|
| 12212 |
+
"step": 1733
|
| 12213 |
+
},
|
| 12214 |
+
{
|
| 12215 |
+
"epoch": 0.5547912334026556,
|
| 12216 |
+
"grad_norm": 8.379231452941895,
|
| 12217 |
+
"learning_rate": 0.00017132669572663808,
|
| 12218 |
+
"loss": 6.3285,
|
| 12219 |
+
"step": 1734
|
| 12220 |
+
},
|
| 12221 |
+
{
|
| 12222 |
+
"epoch": 0.5551111822108462,
|
| 12223 |
+
"grad_norm": 6.226011276245117,
|
| 12224 |
+
"learning_rate": 0.00017113510743602188,
|
| 12225 |
+
"loss": 6.7741,
|
| 12226 |
+
"step": 1735
|
| 12227 |
+
},
|
| 12228 |
+
{
|
| 12229 |
+
"epoch": 0.555431131019037,
|
| 12230 |
+
"grad_norm": 9.727779388427734,
|
| 12231 |
+
"learning_rate": 0.00017094356037959871,
|
| 12232 |
+
"loss": 6.5921,
|
| 12233 |
+
"step": 1736
|
| 12234 |
+
},
|
| 12235 |
+
{
|
| 12236 |
+
"epoch": 0.5557510798272276,
|
| 12237 |
+
"grad_norm": 5.827595233917236,
|
| 12238 |
+
"learning_rate": 0.0001707520547605928,
|
| 12239 |
+
"loss": 6.3784,
|
| 12240 |
+
"step": 1737
|
| 12241 |
+
},
|
| 12242 |
+
{
|
| 12243 |
+
"epoch": 0.5560710286354184,
|
| 12244 |
+
"grad_norm": 5.6185994148254395,
|
| 12245 |
+
"learning_rate": 0.0001705605907821845,
|
| 12246 |
+
"loss": 6.5343,
|
| 12247 |
+
"step": 1738
|
| 12248 |
+
},
|
| 12249 |
+
{
|
| 12250 |
+
"epoch": 0.556390977443609,
|
| 12251 |
+
"grad_norm": 33.767696380615234,
|
| 12252 |
+
"learning_rate": 0.00017036916864750985,
|
| 12253 |
+
"loss": 6.639,
|
| 12254 |
+
"step": 1739
|
| 12255 |
+
},
|
| 12256 |
+
{
|
| 12257 |
+
"epoch": 0.5567109262517997,
|
| 12258 |
+
"grad_norm": 4.321077823638916,
|
| 12259 |
+
"learning_rate": 0.00017017778855966053,
|
| 12260 |
+
"loss": 6.4703,
|
| 12261 |
+
"step": 1740
|
| 12262 |
+
},
|
| 12263 |
+
{
|
| 12264 |
+
"epoch": 0.5570308750599904,
|
| 12265 |
+
"grad_norm": 7.326907634735107,
|
| 12266 |
+
"learning_rate": 0.00016998645072168373,
|
| 12267 |
+
"loss": 6.576,
|
| 12268 |
+
"step": 1741
|
| 12269 |
+
},
|
| 12270 |
+
{
|
| 12271 |
+
"epoch": 0.5573508238681811,
|
| 12272 |
+
"grad_norm": 7.584223747253418,
|
| 12273 |
+
"learning_rate": 0.0001697951553365816,
|
| 12274 |
+
"loss": 6.6078,
|
| 12275 |
+
"step": 1742
|
| 12276 |
+
},
|
| 12277 |
+
{
|
| 12278 |
+
"epoch": 0.5576707726763718,
|
| 12279 |
+
"grad_norm": 7.753310680389404,
|
| 12280 |
+
"learning_rate": 0.0001696039026073115,
|
| 12281 |
+
"loss": 6.4804,
|
| 12282 |
+
"step": 1743
|
| 12283 |
+
},
|
| 12284 |
+
{
|
| 12285 |
+
"epoch": 0.5579907214845625,
|
| 12286 |
+
"grad_norm": 6.727494716644287,
|
| 12287 |
+
"learning_rate": 0.00016941269273678525,
|
| 12288 |
+
"loss": 6.4974,
|
| 12289 |
+
"step": 1744
|
| 12290 |
+
},
|
| 12291 |
+
{
|
| 12292 |
+
"epoch": 0.5583106702927532,
|
| 12293 |
+
"grad_norm": 7.953418254852295,
|
| 12294 |
+
"learning_rate": 0.00016922152592786945,
|
| 12295 |
+
"loss": 6.7166,
|
| 12296 |
+
"step": 1745
|
| 12297 |
+
},
|
| 12298 |
+
{
|
| 12299 |
+
"epoch": 0.5586306191009438,
|
| 12300 |
+
"grad_norm": 6.279244422912598,
|
| 12301 |
+
"learning_rate": 0.00016903040238338489,
|
| 12302 |
+
"loss": 6.6045,
|
| 12303 |
+
"step": 1746
|
| 12304 |
+
},
|
| 12305 |
+
{
|
| 12306 |
+
"epoch": 0.5589505679091346,
|
| 12307 |
+
"grad_norm": 4.617511749267578,
|
| 12308 |
+
"learning_rate": 0.00016883932230610647,
|
| 12309 |
+
"loss": 6.5688,
|
| 12310 |
+
"step": 1747
|
| 12311 |
+
},
|
| 12312 |
+
{
|
| 12313 |
+
"epoch": 0.5592705167173252,
|
| 12314 |
+
"grad_norm": 5.750264644622803,
|
| 12315 |
+
"learning_rate": 0.00016864828589876297,
|
| 12316 |
+
"loss": 6.5031,
|
| 12317 |
+
"step": 1748
|
| 12318 |
+
},
|
| 12319 |
+
{
|
| 12320 |
+
"epoch": 0.559590465525516,
|
| 12321 |
+
"grad_norm": 7.819819450378418,
|
| 12322 |
+
"learning_rate": 0.00016845729336403692,
|
| 12323 |
+
"loss": 6.4559,
|
| 12324 |
+
"step": 1749
|
| 12325 |
+
},
|
| 12326 |
+
{
|
| 12327 |
+
"epoch": 0.5599104143337066,
|
| 12328 |
+
"grad_norm": 7.785708904266357,
|
| 12329 |
+
"learning_rate": 0.00016826634490456414,
|
| 12330 |
+
"loss": 6.5744,
|
| 12331 |
+
"step": 1750
|
| 12332 |
+
},
|
| 12333 |
+
{
|
| 12334 |
+
"epoch": 0.5602303631418973,
|
| 12335 |
+
"grad_norm": 7.5504560470581055,
|
| 12336 |
+
"learning_rate": 0.00016807544072293388,
|
| 12337 |
+
"loss": 6.6894,
|
| 12338 |
+
"step": 1751
|
| 12339 |
+
},
|
| 12340 |
+
{
|
| 12341 |
+
"epoch": 0.560550311950088,
|
| 12342 |
+
"grad_norm": 33.57929992675781,
|
| 12343 |
+
"learning_rate": 0.00016788458102168823,
|
| 12344 |
+
"loss": 6.4001,
|
| 12345 |
+
"step": 1752
|
| 12346 |
+
},
|
| 12347 |
+
{
|
| 12348 |
+
"epoch": 0.5608702607582787,
|
| 12349 |
+
"grad_norm": 12.012317657470703,
|
| 12350 |
+
"learning_rate": 0.00016769376600332222,
|
| 12351 |
+
"loss": 6.4613,
|
| 12352 |
+
"step": 1753
|
| 12353 |
+
},
|
| 12354 |
+
{
|
| 12355 |
+
"epoch": 0.5611902095664694,
|
| 12356 |
+
"grad_norm": 5.870274543762207,
|
| 12357 |
+
"learning_rate": 0.00016750299587028344,
|
| 12358 |
+
"loss": 6.5538,
|
| 12359 |
+
"step": 1754
|
| 12360 |
+
},
|
| 12361 |
+
{
|
| 12362 |
+
"epoch": 0.56151015837466,
|
| 12363 |
+
"grad_norm": 6.509860992431641,
|
| 12364 |
+
"learning_rate": 0.00016731227082497182,
|
| 12365 |
+
"loss": 6.4063,
|
| 12366 |
+
"step": 1755
|
| 12367 |
+
},
|
| 12368 |
+
{
|
| 12369 |
+
"epoch": 0.5618301071828508,
|
| 12370 |
+
"grad_norm": 6.6293044090271,
|
| 12371 |
+
"learning_rate": 0.00016712159106973943,
|
| 12372 |
+
"loss": 6.4052,
|
| 12373 |
+
"step": 1756
|
| 12374 |
+
},
|
| 12375 |
+
{
|
| 12376 |
+
"epoch": 0.5621500559910414,
|
| 12377 |
+
"grad_norm": 98.93505096435547,
|
| 12378 |
+
"learning_rate": 0.00016693095680689045,
|
| 12379 |
+
"loss": 6.7203,
|
| 12380 |
+
"step": 1757
|
| 12381 |
+
},
|
| 12382 |
+
{
|
| 12383 |
+
"epoch": 0.5624700047992321,
|
| 12384 |
+
"grad_norm": 6.476878643035889,
|
| 12385 |
+
"learning_rate": 0.0001667403682386806,
|
| 12386 |
+
"loss": 6.6422,
|
| 12387 |
+
"step": 1758
|
| 12388 |
+
},
|
| 12389 |
+
{
|
| 12390 |
+
"epoch": 0.5627899536074228,
|
| 12391 |
+
"grad_norm": 42.62853240966797,
|
| 12392 |
+
"learning_rate": 0.00016654982556731714,
|
| 12393 |
+
"loss": 6.5487,
|
| 12394 |
+
"step": 1759
|
| 12395 |
+
},
|
| 12396 |
+
{
|
| 12397 |
+
"epoch": 0.5631099024156135,
|
| 12398 |
+
"grad_norm": 21.046525955200195,
|
| 12399 |
+
"learning_rate": 0.00016635932899495886,
|
| 12400 |
+
"loss": 6.4535,
|
| 12401 |
+
"step": 1760
|
| 12402 |
+
},
|
| 12403 |
+
{
|
| 12404 |
+
"epoch": 0.5634298512238042,
|
| 12405 |
+
"grad_norm": 8.30802059173584,
|
| 12406 |
+
"learning_rate": 0.00016616887872371536,
|
| 12407 |
+
"loss": 6.4825,
|
| 12408 |
+
"step": 1761
|
| 12409 |
+
},
|
| 12410 |
+
{
|
| 12411 |
+
"epoch": 0.5637498000319949,
|
| 12412 |
+
"grad_norm": 8.385047912597656,
|
| 12413 |
+
"learning_rate": 0.00016597847495564724,
|
| 12414 |
+
"loss": 6.6208,
|
| 12415 |
+
"step": 1762
|
| 12416 |
+
},
|
| 12417 |
+
{
|
| 12418 |
+
"epoch": 0.5640697488401856,
|
| 12419 |
+
"grad_norm": 21.346595764160156,
|
| 12420 |
+
"learning_rate": 0.00016578811789276588,
|
| 12421 |
+
"loss": 6.4841,
|
| 12422 |
+
"step": 1763
|
| 12423 |
+
},
|
| 12424 |
+
{
|
| 12425 |
+
"epoch": 0.5643896976483762,
|
| 12426 |
+
"grad_norm": 5.010012149810791,
|
| 12427 |
+
"learning_rate": 0.00016559780773703277,
|
| 12428 |
+
"loss": 6.7083,
|
| 12429 |
+
"step": 1764
|
| 12430 |
+
},
|
| 12431 |
+
{
|
| 12432 |
+
"epoch": 0.564709646456567,
|
| 12433 |
+
"grad_norm": 4.354966640472412,
|
| 12434 |
+
"learning_rate": 0.00016540754469036005,
|
| 12435 |
+
"loss": 6.5749,
|
| 12436 |
+
"step": 1765
|
| 12437 |
+
},
|
| 12438 |
+
{
|
| 12439 |
+
"epoch": 0.5650295952647576,
|
| 12440 |
+
"grad_norm": 8.869613647460938,
|
| 12441 |
+
"learning_rate": 0.0001652173289546095,
|
| 12442 |
+
"loss": 6.4343,
|
| 12443 |
+
"step": 1766
|
| 12444 |
+
},
|
| 12445 |
+
{
|
| 12446 |
+
"epoch": 0.5653495440729484,
|
| 12447 |
+
"grad_norm": 12.243342399597168,
|
| 12448 |
+
"learning_rate": 0.00016502716073159298,
|
| 12449 |
+
"loss": 6.479,
|
| 12450 |
+
"step": 1767
|
| 12451 |
+
},
|
| 12452 |
+
{
|
| 12453 |
+
"epoch": 0.565669492881139,
|
| 12454 |
+
"grad_norm": 6.065547466278076,
|
| 12455 |
+
"learning_rate": 0.0001648370402230719,
|
| 12456 |
+
"loss": 6.8581,
|
| 12457 |
+
"step": 1768
|
| 12458 |
+
},
|
| 12459 |
+
{
|
| 12460 |
+
"epoch": 0.5659894416893297,
|
| 12461 |
+
"grad_norm": 14.517911911010742,
|
| 12462 |
+
"learning_rate": 0.00016464696763075686,
|
| 12463 |
+
"loss": 6.4405,
|
| 12464 |
+
"step": 1769
|
| 12465 |
+
},
|
| 12466 |
+
{
|
| 12467 |
+
"epoch": 0.5663093904975204,
|
| 12468 |
+
"grad_norm": 7.9618964195251465,
|
| 12469 |
+
"learning_rate": 0.00016445694315630777,
|
| 12470 |
+
"loss": 6.4442,
|
| 12471 |
+
"step": 1770
|
| 12472 |
+
},
|
| 12473 |
+
{
|
| 12474 |
+
"epoch": 0.5666293393057111,
|
| 12475 |
+
"grad_norm": 8.607887268066406,
|
| 12476 |
+
"learning_rate": 0.00016426696700133357,
|
| 12477 |
+
"loss": 6.6554,
|
| 12478 |
+
"step": 1771
|
| 12479 |
+
},
|
| 12480 |
+
{
|
| 12481 |
+
"epoch": 0.5669492881139018,
|
| 12482 |
+
"grad_norm": 7.719635486602783,
|
| 12483 |
+
"learning_rate": 0.00016407703936739172,
|
| 12484 |
+
"loss": 6.6127,
|
| 12485 |
+
"step": 1772
|
| 12486 |
+
},
|
| 12487 |
+
{
|
| 12488 |
+
"epoch": 0.5672692369220924,
|
| 12489 |
+
"grad_norm": 6.7710371017456055,
|
| 12490 |
+
"learning_rate": 0.00016388716045598832,
|
| 12491 |
+
"loss": 6.702,
|
| 12492 |
+
"step": 1773
|
| 12493 |
+
},
|
| 12494 |
+
{
|
| 12495 |
+
"epoch": 0.5675891857302832,
|
| 12496 |
+
"grad_norm": 5.212691307067871,
|
| 12497 |
+
"learning_rate": 0.00016369733046857788,
|
| 12498 |
+
"loss": 6.5324,
|
| 12499 |
+
"step": 1774
|
| 12500 |
+
},
|
| 12501 |
+
{
|
| 12502 |
+
"epoch": 0.5679091345384738,
|
| 12503 |
+
"grad_norm": 9.58516788482666,
|
| 12504 |
+
"learning_rate": 0.0001635075496065628,
|
| 12505 |
+
"loss": 6.6931,
|
| 12506 |
+
"step": 1775
|
| 12507 |
+
},
|
| 12508 |
+
{
|
| 12509 |
+
"epoch": 0.5682290833466646,
|
| 12510 |
+
"grad_norm": 6.463788986206055,
|
| 12511 |
+
"learning_rate": 0.00016331781807129355,
|
| 12512 |
+
"loss": 6.72,
|
| 12513 |
+
"step": 1776
|
| 12514 |
+
},
|
| 12515 |
+
{
|
| 12516 |
+
"epoch": 0.5685490321548552,
|
| 12517 |
+
"grad_norm": 6.6747002601623535,
|
| 12518 |
+
"learning_rate": 0.000163128136064068,
|
| 12519 |
+
"loss": 6.8406,
|
| 12520 |
+
"step": 1777
|
| 12521 |
+
},
|
| 12522 |
+
{
|
| 12523 |
+
"epoch": 0.568868980963046,
|
| 12524 |
+
"grad_norm": 22.55068016052246,
|
| 12525 |
+
"learning_rate": 0.00016293850378613177,
|
| 12526 |
+
"loss": 6.4797,
|
| 12527 |
+
"step": 1778
|
| 12528 |
+
},
|
| 12529 |
+
{
|
| 12530 |
+
"epoch": 0.5691889297712366,
|
| 12531 |
+
"grad_norm": 4.759416580200195,
|
| 12532 |
+
"learning_rate": 0.0001627489214386776,
|
| 12533 |
+
"loss": 6.6021,
|
| 12534 |
+
"step": 1779
|
| 12535 |
+
},
|
| 12536 |
+
{
|
| 12537 |
+
"epoch": 0.5695088785794273,
|
| 12538 |
+
"grad_norm": 6.074831962585449,
|
| 12539 |
+
"learning_rate": 0.0001625593892228452,
|
| 12540 |
+
"loss": 6.6699,
|
| 12541 |
+
"step": 1780
|
| 12542 |
+
},
|
| 12543 |
+
{
|
| 12544 |
+
"epoch": 0.569828827387618,
|
| 12545 |
+
"grad_norm": 5.710672855377197,
|
| 12546 |
+
"learning_rate": 0.00016236990733972114,
|
| 12547 |
+
"loss": 6.722,
|
| 12548 |
+
"step": 1781
|
| 12549 |
+
},
|
| 12550 |
+
{
|
| 12551 |
+
"epoch": 0.5701487761958086,
|
| 12552 |
+
"grad_norm": 6.164533615112305,
|
| 12553 |
+
"learning_rate": 0.00016218047599033867,
|
| 12554 |
+
"loss": 6.4717,
|
| 12555 |
+
"step": 1782
|
| 12556 |
+
},
|
| 12557 |
+
{
|
| 12558 |
+
"epoch": 0.5704687250039994,
|
| 12559 |
+
"grad_norm": 6.661571502685547,
|
| 12560 |
+
"learning_rate": 0.00016199109537567725,
|
| 12561 |
+
"loss": 6.5454,
|
| 12562 |
+
"step": 1783
|
| 12563 |
+
},
|
| 12564 |
+
{
|
| 12565 |
+
"epoch": 0.57078867381219,
|
| 12566 |
+
"grad_norm": 5.331099510192871,
|
| 12567 |
+
"learning_rate": 0.00016180176569666264,
|
| 12568 |
+
"loss": 6.4191,
|
| 12569 |
+
"step": 1784
|
| 12570 |
+
},
|
| 12571 |
+
{
|
| 12572 |
+
"epoch": 0.5711086226203808,
|
| 12573 |
+
"grad_norm": 36.22768783569336,
|
| 12574 |
+
"learning_rate": 0.00016161248715416656,
|
| 12575 |
+
"loss": 6.8651,
|
| 12576 |
+
"step": 1785
|
| 12577 |
+
},
|
| 12578 |
+
{
|
| 12579 |
+
"epoch": 0.5714285714285714,
|
| 12580 |
+
"grad_norm": 6.483286380767822,
|
| 12581 |
+
"learning_rate": 0.00016142325994900636,
|
| 12582 |
+
"loss": 6.5161,
|
| 12583 |
+
"step": 1786
|
| 12584 |
+
},
|
| 12585 |
+
{
|
| 12586 |
+
"epoch": 0.5717485202367621,
|
| 12587 |
+
"grad_norm": 7.084466457366943,
|
| 12588 |
+
"learning_rate": 0.00016123408428194512,
|
| 12589 |
+
"loss": 6.63,
|
| 12590 |
+
"step": 1787
|
| 12591 |
+
},
|
| 12592 |
+
{
|
| 12593 |
+
"epoch": 0.5720684690449528,
|
| 12594 |
+
"grad_norm": 7.926953315734863,
|
| 12595 |
+
"learning_rate": 0.00016104496035369102,
|
| 12596 |
+
"loss": 6.6144,
|
| 12597 |
+
"step": 1788
|
| 12598 |
+
},
|
| 12599 |
+
{
|
| 12600 |
+
"epoch": 0.5723884178531435,
|
| 12601 |
+
"grad_norm": 4.494540214538574,
|
| 12602 |
+
"learning_rate": 0.0001608558883648975,
|
| 12603 |
+
"loss": 6.5005,
|
| 12604 |
+
"step": 1789
|
| 12605 |
+
},
|
| 12606 |
+
{
|
| 12607 |
+
"epoch": 0.5727083666613342,
|
| 12608 |
+
"grad_norm": 7.054409503936768,
|
| 12609 |
+
"learning_rate": 0.00016066686851616292,
|
| 12610 |
+
"loss": 6.7112,
|
| 12611 |
+
"step": 1790
|
| 12612 |
+
},
|
| 12613 |
+
{
|
| 12614 |
+
"epoch": 0.5730283154695249,
|
| 12615 |
+
"grad_norm": 5.500720500946045,
|
| 12616 |
+
"learning_rate": 0.00016047790100803006,
|
| 12617 |
+
"loss": 6.6917,
|
| 12618 |
+
"step": 1791
|
| 12619 |
+
},
|
| 12620 |
+
{
|
| 12621 |
+
"epoch": 0.5733482642777156,
|
| 12622 |
+
"grad_norm": 5.7658891677856445,
|
| 12623 |
+
"learning_rate": 0.0001602889860409865,
|
| 12624 |
+
"loss": 6.4435,
|
| 12625 |
+
"step": 1792
|
| 12626 |
+
},
|
| 12627 |
+
{
|
| 12628 |
+
"epoch": 0.5736682130859062,
|
| 12629 |
+
"grad_norm": 5.84588623046875,
|
| 12630 |
+
"learning_rate": 0.00016010012381546397,
|
| 12631 |
+
"loss": 6.4001,
|
| 12632 |
+
"step": 1793
|
| 12633 |
+
},
|
| 12634 |
+
{
|
| 12635 |
+
"epoch": 0.573988161894097,
|
| 12636 |
+
"grad_norm": 6.311826229095459,
|
| 12637 |
+
"learning_rate": 0.0001599113145318381,
|
| 12638 |
+
"loss": 6.4495,
|
| 12639 |
+
"step": 1794
|
| 12640 |
+
},
|
| 12641 |
+
{
|
| 12642 |
+
"epoch": 0.5743081107022876,
|
| 12643 |
+
"grad_norm": 6.321776390075684,
|
| 12644 |
+
"learning_rate": 0.00015972255839042843,
|
| 12645 |
+
"loss": 6.4484,
|
| 12646 |
+
"step": 1795
|
| 12647 |
+
},
|
| 12648 |
+
{
|
| 12649 |
+
"epoch": 0.5746280595104784,
|
| 12650 |
+
"grad_norm": 5.284316062927246,
|
| 12651 |
+
"learning_rate": 0.00015953385559149834,
|
| 12652 |
+
"loss": 6.3737,
|
| 12653 |
+
"step": 1796
|
| 12654 |
+
},
|
| 12655 |
+
{
|
| 12656 |
+
"epoch": 0.574948008318669,
|
| 12657 |
+
"grad_norm": 38.006752014160156,
|
| 12658 |
+
"learning_rate": 0.0001593452063352542,
|
| 12659 |
+
"loss": 6.7715,
|
| 12660 |
+
"step": 1797
|
| 12661 |
+
},
|
| 12662 |
+
{
|
| 12663 |
+
"epoch": 0.5752679571268597,
|
| 12664 |
+
"grad_norm": 4.2852396965026855,
|
| 12665 |
+
"learning_rate": 0.00015915661082184596,
|
| 12666 |
+
"loss": 6.4685,
|
| 12667 |
+
"step": 1798
|
| 12668 |
+
},
|
| 12669 |
+
{
|
| 12670 |
+
"epoch": 0.5755879059350504,
|
| 12671 |
+
"grad_norm": 5.658292770385742,
|
| 12672 |
+
"learning_rate": 0.00015896806925136628,
|
| 12673 |
+
"loss": 6.4279,
|
| 12674 |
+
"step": 1799
|
| 12675 |
+
},
|
| 12676 |
+
{
|
| 12677 |
+
"epoch": 0.5759078547432411,
|
| 12678 |
+
"grad_norm": 5.834637641906738,
|
| 12679 |
+
"learning_rate": 0.00015877958182385071,
|
| 12680 |
+
"loss": 6.5669,
|
| 12681 |
+
"step": 1800
|
| 12682 |
+
},
|
| 12683 |
+
{
|
| 12684 |
+
"epoch": 0.5759078547432411,
|
| 12685 |
+
"eval_loss": 3.309910297393799,
|
| 12686 |
+
"eval_runtime": 234.1116,
|
| 12687 |
+
"eval_samples_per_second": 5.621,
|
| 12688 |
+
"eval_steps_per_second": 1.405,
|
| 12689 |
+
"step": 1800
|
| 12690 |
}
|
| 12691 |
],
|
| 12692 |
"logging_steps": 1,
|
|
|
|
| 12701 |
"early_stopping_threshold": 0.0
|
| 12702 |
},
|
| 12703 |
"attributes": {
|
| 12704 |
+
"early_stopping_patience_counter": 6
|
| 12705 |
}
|
| 12706 |
},
|
| 12707 |
"TrainerControl": {
|
|
|
|
| 12710 |
"should_evaluate": false,
|
| 12711 |
"should_log": false,
|
| 12712 |
"should_save": true,
|
| 12713 |
+
"should_training_stop": true
|
| 12714 |
},
|
| 12715 |
"attributes": {}
|
| 12716 |
}
|
| 12717 |
},
|
| 12718 |
+
"total_flos": 1.6635351854481408e+18,
|
| 12719 |
"train_batch_size": 4,
|
| 12720 |
"trial_name": null,
|
| 12721 |
"trial_params": null
|