checkpoint-72000
Browse files- config.json +36 -0
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +3503 -3
- last-checkpoint/training_args.bin +1 -1
- pytorch_model.bin +1 -1
- runs/Sep21_12-15-19_nid006869/events.out.tfevents.1758449727.nid006869.65351.0 +2 -2
- runs/Sep21_22-35-48_nid006862/events.out.tfevents.1758486957.nid006862.27495.0 +3 -0
- training_args.bin +1 -1
config.json
CHANGED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"adapter_reduction": 16,
|
| 3 |
+
"architectures": [
|
| 4 |
+
"DistillationWrapper"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"dtype": "float32",
|
| 9 |
+
"embedding_size": 128,
|
| 10 |
+
"expert_intermediate_size": 4096,
|
| 11 |
+
"group_depth": 4,
|
| 12 |
+
"hidden_act": "gelu",
|
| 13 |
+
"hidden_dropout_prob": 0.1,
|
| 14 |
+
"hidden_size": 1024,
|
| 15 |
+
"initializer_range": 0.02,
|
| 16 |
+
"intermediate_size": 2624,
|
| 17 |
+
"layer_norm_eps": 1e-06,
|
| 18 |
+
"load_balancing_loss_coef": 0.2,
|
| 19 |
+
"lora_alpha": 32,
|
| 20 |
+
"lora_rank": 16,
|
| 21 |
+
"max_position_embeddings": 8192,
|
| 22 |
+
"model_type": "ModernALBERT",
|
| 23 |
+
"num_attention_heads": 16,
|
| 24 |
+
"num_expert_modules": 4,
|
| 25 |
+
"num_experts": 8,
|
| 26 |
+
"num_hidden_layers": 24,
|
| 27 |
+
"pad_token_id": 0,
|
| 28 |
+
"router_jitter_noise": 0.01,
|
| 29 |
+
"top_k": 2,
|
| 30 |
+
"torch_dtype": "bfloat16",
|
| 31 |
+
"transformers_version": "4.51.3",
|
| 32 |
+
"use_adapter": true,
|
| 33 |
+
"use_cache": true,
|
| 34 |
+
"use_moa": true,
|
| 35 |
+
"vocab_size": 50368
|
| 36 |
+
}
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 715030586
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:17d5c7d5296243563494f3afea63efa21147fc119de7562020899eb9c56f1427
|
| 3 |
size 715030586
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1032262338
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50a9561c05504384f8a5358ec95beca3b2814433f472420dc733ce1dc363e842
|
| 3 |
size 1032262338
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ce64cd2b10065510b2a3de9f0ac207f0ad8cd288020534fda1bedb2c5d2780e
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:78433868a2f3a72b8364e0a4caa0abbad26d6987562ec0ce4c93a648ae0d02e0
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e165e3e4cda215fc8a379bfd6a5682d00880cf13bc9084e4cb5b9691f1c11de5
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f60ba8297fe4b68e6f251c6d014a46bdf2808f72122cd82ba0af9d3164ed1dc9
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:514d743b09cdf67b5f7ccba0c67283da3d20aa73a759bcf5ebfccf66234e08c8
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -46908,6 +46908,3506 @@
|
|
| 46908 |
"learning_rate": 0.0004835782880465919,
|
| 46909 |
"loss": 14.4654,
|
| 46910 |
"step": 67000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46911 |
}
|
| 46912 |
],
|
| 46913 |
"logging_steps": 10,
|
|
@@ -46927,7 +50427,7 @@
|
|
| 46927 |
"attributes": {}
|
| 46928 |
}
|
| 46929 |
},
|
| 46930 |
-
"total_flos": 1.
|
| 46931 |
"train_batch_size": 48,
|
| 46932 |
"trial_name": null,
|
| 46933 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.10665465814219437,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 72000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 46908 |
"learning_rate": 0.0004835782880465919,
|
| 46909 |
"loss": 14.4654,
|
| 46910 |
"step": 67000
|
| 46911 |
+
},
|
| 46912 |
+
{
|
| 46913 |
+
"epoch": 0.09926289780706173,
|
| 46914 |
+
"grad_norm": 5.96875,
|
| 46915 |
+
"learning_rate": 0.00048357581857908817,
|
| 46916 |
+
"loss": 14.5418,
|
| 46917 |
+
"step": 67010
|
| 46918 |
+
},
|
| 46919 |
+
{
|
| 46920 |
+
"epoch": 0.09927771095402592,
|
| 46921 |
+
"grad_norm": 9.9375,
|
| 46922 |
+
"learning_rate": 0.0004835733491115844,
|
| 46923 |
+
"loss": 14.522,
|
| 46924 |
+
"step": 67020
|
| 46925 |
+
},
|
| 46926 |
+
{
|
| 46927 |
+
"epoch": 0.09929252410099011,
|
| 46928 |
+
"grad_norm": 7.40625,
|
| 46929 |
+
"learning_rate": 0.00048357087964408056,
|
| 46930 |
+
"loss": 14.5412,
|
| 46931 |
+
"step": 67030
|
| 46932 |
+
},
|
| 46933 |
+
{
|
| 46934 |
+
"epoch": 0.0993073372479543,
|
| 46935 |
+
"grad_norm": 6.84375,
|
| 46936 |
+
"learning_rate": 0.0004835684101765768,
|
| 46937 |
+
"loss": 14.4721,
|
| 46938 |
+
"step": 67040
|
| 46939 |
+
},
|
| 46940 |
+
{
|
| 46941 |
+
"epoch": 0.0993221503949185,
|
| 46942 |
+
"grad_norm": 5.6875,
|
| 46943 |
+
"learning_rate": 0.000483565940709073,
|
| 46944 |
+
"loss": 14.5325,
|
| 46945 |
+
"step": 67050
|
| 46946 |
+
},
|
| 46947 |
+
{
|
| 46948 |
+
"epoch": 0.09933696354188269,
|
| 46949 |
+
"grad_norm": 5.84375,
|
| 46950 |
+
"learning_rate": 0.00048356347124156926,
|
| 46951 |
+
"loss": 14.4896,
|
| 46952 |
+
"step": 67060
|
| 46953 |
+
},
|
| 46954 |
+
{
|
| 46955 |
+
"epoch": 0.09935177668884689,
|
| 46956 |
+
"grad_norm": 6.46875,
|
| 46957 |
+
"learning_rate": 0.00048356100177406546,
|
| 46958 |
+
"loss": 14.4818,
|
| 46959 |
+
"step": 67070
|
| 46960 |
+
},
|
| 46961 |
+
{
|
| 46962 |
+
"epoch": 0.09936658983581108,
|
| 46963 |
+
"grad_norm": 6.28125,
|
| 46964 |
+
"learning_rate": 0.00048355853230656166,
|
| 46965 |
+
"loss": 14.6064,
|
| 46966 |
+
"step": 67080
|
| 46967 |
+
},
|
| 46968 |
+
{
|
| 46969 |
+
"epoch": 0.09938140298277527,
|
| 46970 |
+
"grad_norm": 5.96875,
|
| 46971 |
+
"learning_rate": 0.0004835560628390579,
|
| 46972 |
+
"loss": 14.4964,
|
| 46973 |
+
"step": 67090
|
| 46974 |
+
},
|
| 46975 |
+
{
|
| 46976 |
+
"epoch": 0.09939621612973946,
|
| 46977 |
+
"grad_norm": 6.03125,
|
| 46978 |
+
"learning_rate": 0.00048355359337155416,
|
| 46979 |
+
"loss": 14.5247,
|
| 46980 |
+
"step": 67100
|
| 46981 |
+
},
|
| 46982 |
+
{
|
| 46983 |
+
"epoch": 0.09941102927670366,
|
| 46984 |
+
"grad_norm": 6.125,
|
| 46985 |
+
"learning_rate": 0.0004835511239040503,
|
| 46986 |
+
"loss": 14.5124,
|
| 46987 |
+
"step": 67110
|
| 46988 |
+
},
|
| 46989 |
+
{
|
| 46990 |
+
"epoch": 0.09942584242366785,
|
| 46991 |
+
"grad_norm": 17.125,
|
| 46992 |
+
"learning_rate": 0.00048354865443654655,
|
| 46993 |
+
"loss": 14.6171,
|
| 46994 |
+
"step": 67120
|
| 46995 |
+
},
|
| 46996 |
+
{
|
| 46997 |
+
"epoch": 0.09944065557063204,
|
| 46998 |
+
"grad_norm": 6.375,
|
| 46999 |
+
"learning_rate": 0.0004835461849690428,
|
| 47000 |
+
"loss": 14.5187,
|
| 47001 |
+
"step": 67130
|
| 47002 |
+
},
|
| 47003 |
+
{
|
| 47004 |
+
"epoch": 0.09945546871759624,
|
| 47005 |
+
"grad_norm": 6.21875,
|
| 47006 |
+
"learning_rate": 0.00048354371550153895,
|
| 47007 |
+
"loss": 14.4875,
|
| 47008 |
+
"step": 67140
|
| 47009 |
+
},
|
| 47010 |
+
{
|
| 47011 |
+
"epoch": 0.09947028186456043,
|
| 47012 |
+
"grad_norm": 6.09375,
|
| 47013 |
+
"learning_rate": 0.0004835412460340352,
|
| 47014 |
+
"loss": 14.4881,
|
| 47015 |
+
"step": 67150
|
| 47016 |
+
},
|
| 47017 |
+
{
|
| 47018 |
+
"epoch": 0.09948509501152462,
|
| 47019 |
+
"grad_norm": 7.1875,
|
| 47020 |
+
"learning_rate": 0.0004835387765665314,
|
| 47021 |
+
"loss": 14.549,
|
| 47022 |
+
"step": 67160
|
| 47023 |
+
},
|
| 47024 |
+
{
|
| 47025 |
+
"epoch": 0.09949990815848882,
|
| 47026 |
+
"grad_norm": 6.5,
|
| 47027 |
+
"learning_rate": 0.00048353630709902764,
|
| 47028 |
+
"loss": 14.5416,
|
| 47029 |
+
"step": 67170
|
| 47030 |
+
},
|
| 47031 |
+
{
|
| 47032 |
+
"epoch": 0.09951472130545301,
|
| 47033 |
+
"grad_norm": 6.34375,
|
| 47034 |
+
"learning_rate": 0.00048353383763152384,
|
| 47035 |
+
"loss": 14.5421,
|
| 47036 |
+
"step": 67180
|
| 47037 |
+
},
|
| 47038 |
+
{
|
| 47039 |
+
"epoch": 0.0995295344524172,
|
| 47040 |
+
"grad_norm": 7.15625,
|
| 47041 |
+
"learning_rate": 0.00048353136816402004,
|
| 47042 |
+
"loss": 14.5221,
|
| 47043 |
+
"step": 67190
|
| 47044 |
+
},
|
| 47045 |
+
{
|
| 47046 |
+
"epoch": 0.0995443475993814,
|
| 47047 |
+
"grad_norm": 7.28125,
|
| 47048 |
+
"learning_rate": 0.0004835288986965163,
|
| 47049 |
+
"loss": 14.562,
|
| 47050 |
+
"step": 67200
|
| 47051 |
+
},
|
| 47052 |
+
{
|
| 47053 |
+
"epoch": 0.09955916074634559,
|
| 47054 |
+
"grad_norm": 6.65625,
|
| 47055 |
+
"learning_rate": 0.00048352642922901254,
|
| 47056 |
+
"loss": 14.5283,
|
| 47057 |
+
"step": 67210
|
| 47058 |
+
},
|
| 47059 |
+
{
|
| 47060 |
+
"epoch": 0.09957397389330978,
|
| 47061 |
+
"grad_norm": 6.0,
|
| 47062 |
+
"learning_rate": 0.0004835239597615087,
|
| 47063 |
+
"loss": 14.5134,
|
| 47064 |
+
"step": 67220
|
| 47065 |
+
},
|
| 47066 |
+
{
|
| 47067 |
+
"epoch": 0.09958878704027399,
|
| 47068 |
+
"grad_norm": 6.625,
|
| 47069 |
+
"learning_rate": 0.00048352149029400493,
|
| 47070 |
+
"loss": 14.4863,
|
| 47071 |
+
"step": 67230
|
| 47072 |
+
},
|
| 47073 |
+
{
|
| 47074 |
+
"epoch": 0.09960360018723818,
|
| 47075 |
+
"grad_norm": 6.875,
|
| 47076 |
+
"learning_rate": 0.0004835190208265012,
|
| 47077 |
+
"loss": 14.5705,
|
| 47078 |
+
"step": 67240
|
| 47079 |
+
},
|
| 47080 |
+
{
|
| 47081 |
+
"epoch": 0.09961841333420238,
|
| 47082 |
+
"grad_norm": 6.1875,
|
| 47083 |
+
"learning_rate": 0.0004835165513589974,
|
| 47084 |
+
"loss": 14.4729,
|
| 47085 |
+
"step": 67250
|
| 47086 |
+
},
|
| 47087 |
+
{
|
| 47088 |
+
"epoch": 0.09963322648116657,
|
| 47089 |
+
"grad_norm": 7.53125,
|
| 47090 |
+
"learning_rate": 0.0004835140818914936,
|
| 47091 |
+
"loss": 14.4941,
|
| 47092 |
+
"step": 67260
|
| 47093 |
+
},
|
| 47094 |
+
{
|
| 47095 |
+
"epoch": 0.09964803962813076,
|
| 47096 |
+
"grad_norm": 6.375,
|
| 47097 |
+
"learning_rate": 0.0004835116124239898,
|
| 47098 |
+
"loss": 14.5272,
|
| 47099 |
+
"step": 67270
|
| 47100 |
+
},
|
| 47101 |
+
{
|
| 47102 |
+
"epoch": 0.09966285277509496,
|
| 47103 |
+
"grad_norm": 6.125,
|
| 47104 |
+
"learning_rate": 0.000483509142956486,
|
| 47105 |
+
"loss": 14.5184,
|
| 47106 |
+
"step": 67280
|
| 47107 |
+
},
|
| 47108 |
+
{
|
| 47109 |
+
"epoch": 0.09967766592205915,
|
| 47110 |
+
"grad_norm": 6.6875,
|
| 47111 |
+
"learning_rate": 0.0004835066734889823,
|
| 47112 |
+
"loss": 14.5324,
|
| 47113 |
+
"step": 67290
|
| 47114 |
+
},
|
| 47115 |
+
{
|
| 47116 |
+
"epoch": 0.09969247906902334,
|
| 47117 |
+
"grad_norm": 8.625,
|
| 47118 |
+
"learning_rate": 0.0004835042040214784,
|
| 47119 |
+
"loss": 14.589,
|
| 47120 |
+
"step": 67300
|
| 47121 |
+
},
|
| 47122 |
+
{
|
| 47123 |
+
"epoch": 0.09970729221598754,
|
| 47124 |
+
"grad_norm": 6.65625,
|
| 47125 |
+
"learning_rate": 0.00048350173455397467,
|
| 47126 |
+
"loss": 14.5227,
|
| 47127 |
+
"step": 67310
|
| 47128 |
+
},
|
| 47129 |
+
{
|
| 47130 |
+
"epoch": 0.09972210536295173,
|
| 47131 |
+
"grad_norm": 15.25,
|
| 47132 |
+
"learning_rate": 0.0004834992650864709,
|
| 47133 |
+
"loss": 14.5939,
|
| 47134 |
+
"step": 67320
|
| 47135 |
+
},
|
| 47136 |
+
{
|
| 47137 |
+
"epoch": 0.09973691850991592,
|
| 47138 |
+
"grad_norm": 6.3125,
|
| 47139 |
+
"learning_rate": 0.00048349679561896706,
|
| 47140 |
+
"loss": 14.5012,
|
| 47141 |
+
"step": 67330
|
| 47142 |
+
},
|
| 47143 |
+
{
|
| 47144 |
+
"epoch": 0.09975173165688012,
|
| 47145 |
+
"grad_norm": 6.90625,
|
| 47146 |
+
"learning_rate": 0.0004834943261514633,
|
| 47147 |
+
"loss": 14.5002,
|
| 47148 |
+
"step": 67340
|
| 47149 |
+
},
|
| 47150 |
+
{
|
| 47151 |
+
"epoch": 0.09976654480384431,
|
| 47152 |
+
"grad_norm": 6.21875,
|
| 47153 |
+
"learning_rate": 0.0004834918566839595,
|
| 47154 |
+
"loss": 14.46,
|
| 47155 |
+
"step": 67350
|
| 47156 |
+
},
|
| 47157 |
+
{
|
| 47158 |
+
"epoch": 0.0997813579508085,
|
| 47159 |
+
"grad_norm": 7.5625,
|
| 47160 |
+
"learning_rate": 0.00048348938721645576,
|
| 47161 |
+
"loss": 14.5698,
|
| 47162 |
+
"step": 67360
|
| 47163 |
+
},
|
| 47164 |
+
{
|
| 47165 |
+
"epoch": 0.0997961710977727,
|
| 47166 |
+
"grad_norm": 6.125,
|
| 47167 |
+
"learning_rate": 0.00048348691774895196,
|
| 47168 |
+
"loss": 14.4558,
|
| 47169 |
+
"step": 67370
|
| 47170 |
+
},
|
| 47171 |
+
{
|
| 47172 |
+
"epoch": 0.09981098424473689,
|
| 47173 |
+
"grad_norm": 7.21875,
|
| 47174 |
+
"learning_rate": 0.00048348444828144816,
|
| 47175 |
+
"loss": 14.4794,
|
| 47176 |
+
"step": 67380
|
| 47177 |
+
},
|
| 47178 |
+
{
|
| 47179 |
+
"epoch": 0.09982579739170108,
|
| 47180 |
+
"grad_norm": 7.0,
|
| 47181 |
+
"learning_rate": 0.0004834819788139444,
|
| 47182 |
+
"loss": 14.5134,
|
| 47183 |
+
"step": 67390
|
| 47184 |
+
},
|
| 47185 |
+
{
|
| 47186 |
+
"epoch": 0.09984061053866528,
|
| 47187 |
+
"grad_norm": 6.34375,
|
| 47188 |
+
"learning_rate": 0.00048347950934644066,
|
| 47189 |
+
"loss": 14.4861,
|
| 47190 |
+
"step": 67400
|
| 47191 |
+
},
|
| 47192 |
+
{
|
| 47193 |
+
"epoch": 0.09985542368562947,
|
| 47194 |
+
"grad_norm": 5.875,
|
| 47195 |
+
"learning_rate": 0.0004834770398789368,
|
| 47196 |
+
"loss": 14.5372,
|
| 47197 |
+
"step": 67410
|
| 47198 |
+
},
|
| 47199 |
+
{
|
| 47200 |
+
"epoch": 0.09987023683259366,
|
| 47201 |
+
"grad_norm": 5.71875,
|
| 47202 |
+
"learning_rate": 0.00048347457041143305,
|
| 47203 |
+
"loss": 14.5905,
|
| 47204 |
+
"step": 67420
|
| 47205 |
+
},
|
| 47206 |
+
{
|
| 47207 |
+
"epoch": 0.09988504997955786,
|
| 47208 |
+
"grad_norm": 7.96875,
|
| 47209 |
+
"learning_rate": 0.0004834721009439293,
|
| 47210 |
+
"loss": 14.4618,
|
| 47211 |
+
"step": 67430
|
| 47212 |
+
},
|
| 47213 |
+
{
|
| 47214 |
+
"epoch": 0.09989986312652205,
|
| 47215 |
+
"grad_norm": 6.5,
|
| 47216 |
+
"learning_rate": 0.0004834696314764255,
|
| 47217 |
+
"loss": 14.5671,
|
| 47218 |
+
"step": 67440
|
| 47219 |
+
},
|
| 47220 |
+
{
|
| 47221 |
+
"epoch": 0.09991467627348624,
|
| 47222 |
+
"grad_norm": 6.65625,
|
| 47223 |
+
"learning_rate": 0.0004834671620089217,
|
| 47224 |
+
"loss": 14.5759,
|
| 47225 |
+
"step": 67450
|
| 47226 |
+
},
|
| 47227 |
+
{
|
| 47228 |
+
"epoch": 0.09992948942045043,
|
| 47229 |
+
"grad_norm": 6.53125,
|
| 47230 |
+
"learning_rate": 0.0004834646925414179,
|
| 47231 |
+
"loss": 14.4918,
|
| 47232 |
+
"step": 67460
|
| 47233 |
+
},
|
| 47234 |
+
{
|
| 47235 |
+
"epoch": 0.09994430256741463,
|
| 47236 |
+
"grad_norm": 6.0625,
|
| 47237 |
+
"learning_rate": 0.00048346222307391415,
|
| 47238 |
+
"loss": 14.4266,
|
| 47239 |
+
"step": 67470
|
| 47240 |
+
},
|
| 47241 |
+
{
|
| 47242 |
+
"epoch": 0.09995911571437882,
|
| 47243 |
+
"grad_norm": 6.8125,
|
| 47244 |
+
"learning_rate": 0.00048345975360641034,
|
| 47245 |
+
"loss": 14.5495,
|
| 47246 |
+
"step": 67480
|
| 47247 |
+
},
|
| 47248 |
+
{
|
| 47249 |
+
"epoch": 0.09997392886134301,
|
| 47250 |
+
"grad_norm": 6.1875,
|
| 47251 |
+
"learning_rate": 0.00048345728413890654,
|
| 47252 |
+
"loss": 14.431,
|
| 47253 |
+
"step": 67490
|
| 47254 |
+
},
|
| 47255 |
+
{
|
| 47256 |
+
"epoch": 0.09998874200830721,
|
| 47257 |
+
"grad_norm": 5.9375,
|
| 47258 |
+
"learning_rate": 0.0004834548146714028,
|
| 47259 |
+
"loss": 14.5487,
|
| 47260 |
+
"step": 67500
|
| 47261 |
+
},
|
| 47262 |
+
{
|
| 47263 |
+
"epoch": 0.1000035551552714,
|
| 47264 |
+
"grad_norm": 6.15625,
|
| 47265 |
+
"learning_rate": 0.00048345234520389904,
|
| 47266 |
+
"loss": 14.5167,
|
| 47267 |
+
"step": 67510
|
| 47268 |
+
},
|
| 47269 |
+
{
|
| 47270 |
+
"epoch": 0.1000183683022356,
|
| 47271 |
+
"grad_norm": 6.375,
|
| 47272 |
+
"learning_rate": 0.0004834498757363952,
|
| 47273 |
+
"loss": 14.5398,
|
| 47274 |
+
"step": 67520
|
| 47275 |
+
},
|
| 47276 |
+
{
|
| 47277 |
+
"epoch": 0.10003318144919979,
|
| 47278 |
+
"grad_norm": 9.125,
|
| 47279 |
+
"learning_rate": 0.00048344740626889143,
|
| 47280 |
+
"loss": 14.5889,
|
| 47281 |
+
"step": 67530
|
| 47282 |
+
},
|
| 47283 |
+
{
|
| 47284 |
+
"epoch": 0.100047994596164,
|
| 47285 |
+
"grad_norm": 7.40625,
|
| 47286 |
+
"learning_rate": 0.0004834449368013877,
|
| 47287 |
+
"loss": 14.4624,
|
| 47288 |
+
"step": 67540
|
| 47289 |
+
},
|
| 47290 |
+
{
|
| 47291 |
+
"epoch": 0.10006280774312819,
|
| 47292 |
+
"grad_norm": 6.125,
|
| 47293 |
+
"learning_rate": 0.0004834424673338839,
|
| 47294 |
+
"loss": 14.5035,
|
| 47295 |
+
"step": 67550
|
| 47296 |
+
},
|
| 47297 |
+
{
|
| 47298 |
+
"epoch": 0.10007762089009238,
|
| 47299 |
+
"grad_norm": 6.34375,
|
| 47300 |
+
"learning_rate": 0.0004834399978663801,
|
| 47301 |
+
"loss": 14.4799,
|
| 47302 |
+
"step": 67560
|
| 47303 |
+
},
|
| 47304 |
+
{
|
| 47305 |
+
"epoch": 0.10009243403705657,
|
| 47306 |
+
"grad_norm": 5.6875,
|
| 47307 |
+
"learning_rate": 0.0004834375283988763,
|
| 47308 |
+
"loss": 14.4791,
|
| 47309 |
+
"step": 67570
|
| 47310 |
+
},
|
| 47311 |
+
{
|
| 47312 |
+
"epoch": 0.10010724718402077,
|
| 47313 |
+
"grad_norm": 5.90625,
|
| 47314 |
+
"learning_rate": 0.0004834350589313725,
|
| 47315 |
+
"loss": 14.4452,
|
| 47316 |
+
"step": 67580
|
| 47317 |
+
},
|
| 47318 |
+
{
|
| 47319 |
+
"epoch": 0.10012206033098496,
|
| 47320 |
+
"grad_norm": 6.15625,
|
| 47321 |
+
"learning_rate": 0.0004834325894638688,
|
| 47322 |
+
"loss": 14.5724,
|
| 47323 |
+
"step": 67590
|
| 47324 |
+
},
|
| 47325 |
+
{
|
| 47326 |
+
"epoch": 0.10013687347794915,
|
| 47327 |
+
"grad_norm": 6.625,
|
| 47328 |
+
"learning_rate": 0.0004834301199963649,
|
| 47329 |
+
"loss": 14.5694,
|
| 47330 |
+
"step": 67600
|
| 47331 |
+
},
|
| 47332 |
+
{
|
| 47333 |
+
"epoch": 0.10015168662491335,
|
| 47334 |
+
"grad_norm": 5.9375,
|
| 47335 |
+
"learning_rate": 0.00048342765052886117,
|
| 47336 |
+
"loss": 14.5548,
|
| 47337 |
+
"step": 67610
|
| 47338 |
+
},
|
| 47339 |
+
{
|
| 47340 |
+
"epoch": 0.10016649977187754,
|
| 47341 |
+
"grad_norm": 5.96875,
|
| 47342 |
+
"learning_rate": 0.0004834251810613574,
|
| 47343 |
+
"loss": 14.4843,
|
| 47344 |
+
"step": 67620
|
| 47345 |
+
},
|
| 47346 |
+
{
|
| 47347 |
+
"epoch": 0.10018131291884173,
|
| 47348 |
+
"grad_norm": 6.28125,
|
| 47349 |
+
"learning_rate": 0.0004834227115938536,
|
| 47350 |
+
"loss": 14.5326,
|
| 47351 |
+
"step": 67630
|
| 47352 |
+
},
|
| 47353 |
+
{
|
| 47354 |
+
"epoch": 0.10019612606580593,
|
| 47355 |
+
"grad_norm": 6.9375,
|
| 47356 |
+
"learning_rate": 0.0004834202421263498,
|
| 47357 |
+
"loss": 14.5622,
|
| 47358 |
+
"step": 67640
|
| 47359 |
+
},
|
| 47360 |
+
{
|
| 47361 |
+
"epoch": 0.10021093921277012,
|
| 47362 |
+
"grad_norm": 5.9375,
|
| 47363 |
+
"learning_rate": 0.000483417772658846,
|
| 47364 |
+
"loss": 14.5574,
|
| 47365 |
+
"step": 67650
|
| 47366 |
+
},
|
| 47367 |
+
{
|
| 47368 |
+
"epoch": 0.10022575235973431,
|
| 47369 |
+
"grad_norm": 7.03125,
|
| 47370 |
+
"learning_rate": 0.00048341530319134226,
|
| 47371 |
+
"loss": 14.4769,
|
| 47372 |
+
"step": 67660
|
| 47373 |
+
},
|
| 47374 |
+
{
|
| 47375 |
+
"epoch": 0.1002405655066985,
|
| 47376 |
+
"grad_norm": 6.15625,
|
| 47377 |
+
"learning_rate": 0.00048341283372383846,
|
| 47378 |
+
"loss": 14.6297,
|
| 47379 |
+
"step": 67670
|
| 47380 |
+
},
|
| 47381 |
+
{
|
| 47382 |
+
"epoch": 0.1002553786536627,
|
| 47383 |
+
"grad_norm": 6.125,
|
| 47384 |
+
"learning_rate": 0.00048341036425633466,
|
| 47385 |
+
"loss": 14.5413,
|
| 47386 |
+
"step": 67680
|
| 47387 |
+
},
|
| 47388 |
+
{
|
| 47389 |
+
"epoch": 0.10027019180062689,
|
| 47390 |
+
"grad_norm": 6.3125,
|
| 47391 |
+
"learning_rate": 0.0004834078947888309,
|
| 47392 |
+
"loss": 14.5566,
|
| 47393 |
+
"step": 67690
|
| 47394 |
+
},
|
| 47395 |
+
{
|
| 47396 |
+
"epoch": 0.10028500494759109,
|
| 47397 |
+
"grad_norm": 6.1875,
|
| 47398 |
+
"learning_rate": 0.00048340542532132716,
|
| 47399 |
+
"loss": 14.5149,
|
| 47400 |
+
"step": 67700
|
| 47401 |
+
},
|
| 47402 |
+
{
|
| 47403 |
+
"epoch": 0.10029981809455528,
|
| 47404 |
+
"grad_norm": 7.15625,
|
| 47405 |
+
"learning_rate": 0.0004834029558538233,
|
| 47406 |
+
"loss": 14.5693,
|
| 47407 |
+
"step": 67710
|
| 47408 |
+
},
|
| 47409 |
+
{
|
| 47410 |
+
"epoch": 0.10031463124151947,
|
| 47411 |
+
"grad_norm": 5.8125,
|
| 47412 |
+
"learning_rate": 0.00048340048638631955,
|
| 47413 |
+
"loss": 14.5345,
|
| 47414 |
+
"step": 67720
|
| 47415 |
+
},
|
| 47416 |
+
{
|
| 47417 |
+
"epoch": 0.10032944438848367,
|
| 47418 |
+
"grad_norm": 6.53125,
|
| 47419 |
+
"learning_rate": 0.0004833980169188158,
|
| 47420 |
+
"loss": 14.5749,
|
| 47421 |
+
"step": 67730
|
| 47422 |
+
},
|
| 47423 |
+
{
|
| 47424 |
+
"epoch": 0.10034425753544786,
|
| 47425 |
+
"grad_norm": 6.53125,
|
| 47426 |
+
"learning_rate": 0.000483395547451312,
|
| 47427 |
+
"loss": 14.4721,
|
| 47428 |
+
"step": 67740
|
| 47429 |
+
},
|
| 47430 |
+
{
|
| 47431 |
+
"epoch": 0.10035907068241205,
|
| 47432 |
+
"grad_norm": 6.125,
|
| 47433 |
+
"learning_rate": 0.0004833930779838082,
|
| 47434 |
+
"loss": 14.5075,
|
| 47435 |
+
"step": 67750
|
| 47436 |
+
},
|
| 47437 |
+
{
|
| 47438 |
+
"epoch": 0.10037388382937625,
|
| 47439 |
+
"grad_norm": 6.9375,
|
| 47440 |
+
"learning_rate": 0.0004833906085163044,
|
| 47441 |
+
"loss": 14.4299,
|
| 47442 |
+
"step": 67760
|
| 47443 |
+
},
|
| 47444 |
+
{
|
| 47445 |
+
"epoch": 0.10038869697634044,
|
| 47446 |
+
"grad_norm": 6.75,
|
| 47447 |
+
"learning_rate": 0.00048338813904880065,
|
| 47448 |
+
"loss": 14.4895,
|
| 47449 |
+
"step": 67770
|
| 47450 |
+
},
|
| 47451 |
+
{
|
| 47452 |
+
"epoch": 0.10040351012330463,
|
| 47453 |
+
"grad_norm": 7.3125,
|
| 47454 |
+
"learning_rate": 0.0004833856695812969,
|
| 47455 |
+
"loss": 14.5029,
|
| 47456 |
+
"step": 67780
|
| 47457 |
+
},
|
| 47458 |
+
{
|
| 47459 |
+
"epoch": 0.10041832327026882,
|
| 47460 |
+
"grad_norm": 7.40625,
|
| 47461 |
+
"learning_rate": 0.00048338320011379304,
|
| 47462 |
+
"loss": 14.5889,
|
| 47463 |
+
"step": 67790
|
| 47464 |
+
},
|
| 47465 |
+
{
|
| 47466 |
+
"epoch": 0.10043313641723302,
|
| 47467 |
+
"grad_norm": 6.09375,
|
| 47468 |
+
"learning_rate": 0.0004833807306462893,
|
| 47469 |
+
"loss": 14.5871,
|
| 47470 |
+
"step": 67800
|
| 47471 |
+
},
|
| 47472 |
+
{
|
| 47473 |
+
"epoch": 0.10044794956419721,
|
| 47474 |
+
"grad_norm": 6.1875,
|
| 47475 |
+
"learning_rate": 0.00048337826117878554,
|
| 47476 |
+
"loss": 14.3667,
|
| 47477 |
+
"step": 67810
|
| 47478 |
+
},
|
| 47479 |
+
{
|
| 47480 |
+
"epoch": 0.1004627627111614,
|
| 47481 |
+
"grad_norm": 6.28125,
|
| 47482 |
+
"learning_rate": 0.0004833757917112817,
|
| 47483 |
+
"loss": 14.4797,
|
| 47484 |
+
"step": 67820
|
| 47485 |
+
},
|
| 47486 |
+
{
|
| 47487 |
+
"epoch": 0.1004775758581256,
|
| 47488 |
+
"grad_norm": 6.59375,
|
| 47489 |
+
"learning_rate": 0.00048337332224377793,
|
| 47490 |
+
"loss": 14.5267,
|
| 47491 |
+
"step": 67830
|
| 47492 |
+
},
|
| 47493 |
+
{
|
| 47494 |
+
"epoch": 0.10049238900508979,
|
| 47495 |
+
"grad_norm": 6.3125,
|
| 47496 |
+
"learning_rate": 0.0004833708527762742,
|
| 47497 |
+
"loss": 14.5615,
|
| 47498 |
+
"step": 67840
|
| 47499 |
+
},
|
| 47500 |
+
{
|
| 47501 |
+
"epoch": 0.10050720215205398,
|
| 47502 |
+
"grad_norm": 6.4375,
|
| 47503 |
+
"learning_rate": 0.0004833683833087704,
|
| 47504 |
+
"loss": 14.4988,
|
| 47505 |
+
"step": 67850
|
| 47506 |
+
},
|
| 47507 |
+
{
|
| 47508 |
+
"epoch": 0.10052201529901819,
|
| 47509 |
+
"grad_norm": 6.1875,
|
| 47510 |
+
"learning_rate": 0.0004833659138412666,
|
| 47511 |
+
"loss": 14.5847,
|
| 47512 |
+
"step": 67860
|
| 47513 |
+
},
|
| 47514 |
+
{
|
| 47515 |
+
"epoch": 0.10053682844598238,
|
| 47516 |
+
"grad_norm": 7.3125,
|
| 47517 |
+
"learning_rate": 0.0004833634443737628,
|
| 47518 |
+
"loss": 14.3755,
|
| 47519 |
+
"step": 67870
|
| 47520 |
+
},
|
| 47521 |
+
{
|
| 47522 |
+
"epoch": 0.10055164159294658,
|
| 47523 |
+
"grad_norm": 6.75,
|
| 47524 |
+
"learning_rate": 0.00048336097490625903,
|
| 47525 |
+
"loss": 14.4912,
|
| 47526 |
+
"step": 67880
|
| 47527 |
+
},
|
| 47528 |
+
{
|
| 47529 |
+
"epoch": 0.10056645473991077,
|
| 47530 |
+
"grad_norm": 6.0,
|
| 47531 |
+
"learning_rate": 0.0004833585054387553,
|
| 47532 |
+
"loss": 14.4245,
|
| 47533 |
+
"step": 67890
|
| 47534 |
+
},
|
| 47535 |
+
{
|
| 47536 |
+
"epoch": 0.10058126788687496,
|
| 47537 |
+
"grad_norm": 5.75,
|
| 47538 |
+
"learning_rate": 0.0004833560359712514,
|
| 47539 |
+
"loss": 14.586,
|
| 47540 |
+
"step": 67900
|
| 47541 |
+
},
|
| 47542 |
+
{
|
| 47543 |
+
"epoch": 0.10059608103383916,
|
| 47544 |
+
"grad_norm": 6.5,
|
| 47545 |
+
"learning_rate": 0.00048335356650374767,
|
| 47546 |
+
"loss": 14.4471,
|
| 47547 |
+
"step": 67910
|
| 47548 |
+
},
|
| 47549 |
+
{
|
| 47550 |
+
"epoch": 0.10061089418080335,
|
| 47551 |
+
"grad_norm": 6.53125,
|
| 47552 |
+
"learning_rate": 0.0004833510970362439,
|
| 47553 |
+
"loss": 14.5007,
|
| 47554 |
+
"step": 67920
|
| 47555 |
+
},
|
| 47556 |
+
{
|
| 47557 |
+
"epoch": 0.10062570732776754,
|
| 47558 |
+
"grad_norm": 6.90625,
|
| 47559 |
+
"learning_rate": 0.0004833486275687401,
|
| 47560 |
+
"loss": 14.5532,
|
| 47561 |
+
"step": 67930
|
| 47562 |
+
},
|
| 47563 |
+
{
|
| 47564 |
+
"epoch": 0.10064052047473174,
|
| 47565 |
+
"grad_norm": 5.8125,
|
| 47566 |
+
"learning_rate": 0.0004833461581012363,
|
| 47567 |
+
"loss": 14.4832,
|
| 47568 |
+
"step": 67940
|
| 47569 |
+
},
|
| 47570 |
+
{
|
| 47571 |
+
"epoch": 0.10065533362169593,
|
| 47572 |
+
"grad_norm": 5.8125,
|
| 47573 |
+
"learning_rate": 0.0004833436886337325,
|
| 47574 |
+
"loss": 14.4492,
|
| 47575 |
+
"step": 67950
|
| 47576 |
+
},
|
| 47577 |
+
{
|
| 47578 |
+
"epoch": 0.10067014676866012,
|
| 47579 |
+
"grad_norm": 6.40625,
|
| 47580 |
+
"learning_rate": 0.00048334121916622876,
|
| 47581 |
+
"loss": 14.4858,
|
| 47582 |
+
"step": 67960
|
| 47583 |
+
},
|
| 47584 |
+
{
|
| 47585 |
+
"epoch": 0.10068495991562432,
|
| 47586 |
+
"grad_norm": 8.9375,
|
| 47587 |
+
"learning_rate": 0.000483338749698725,
|
| 47588 |
+
"loss": 14.606,
|
| 47589 |
+
"step": 67970
|
| 47590 |
+
},
|
| 47591 |
+
{
|
| 47592 |
+
"epoch": 0.10069977306258851,
|
| 47593 |
+
"grad_norm": 6.4375,
|
| 47594 |
+
"learning_rate": 0.00048333628023122116,
|
| 47595 |
+
"loss": 14.4891,
|
| 47596 |
+
"step": 67980
|
| 47597 |
+
},
|
| 47598 |
+
{
|
| 47599 |
+
"epoch": 0.1007145862095527,
|
| 47600 |
+
"grad_norm": 6.25,
|
| 47601 |
+
"learning_rate": 0.0004833338107637174,
|
| 47602 |
+
"loss": 14.3973,
|
| 47603 |
+
"step": 67990
|
| 47604 |
+
},
|
| 47605 |
+
{
|
| 47606 |
+
"epoch": 0.1007293993565169,
|
| 47607 |
+
"grad_norm": 7.0625,
|
| 47608 |
+
"learning_rate": 0.00048333134129621366,
|
| 47609 |
+
"loss": 14.4613,
|
| 47610 |
+
"step": 68000
|
| 47611 |
+
},
|
| 47612 |
+
{
|
| 47613 |
+
"epoch": 0.10074421250348109,
|
| 47614 |
+
"grad_norm": 5.9375,
|
| 47615 |
+
"learning_rate": 0.0004833288718287098,
|
| 47616 |
+
"loss": 14.574,
|
| 47617 |
+
"step": 68010
|
| 47618 |
+
},
|
| 47619 |
+
{
|
| 47620 |
+
"epoch": 0.10075902565044528,
|
| 47621 |
+
"grad_norm": 6.4375,
|
| 47622 |
+
"learning_rate": 0.00048332640236120605,
|
| 47623 |
+
"loss": 14.378,
|
| 47624 |
+
"step": 68020
|
| 47625 |
+
},
|
| 47626 |
+
{
|
| 47627 |
+
"epoch": 0.10077383879740948,
|
| 47628 |
+
"grad_norm": 6.21875,
|
| 47629 |
+
"learning_rate": 0.0004833239328937023,
|
| 47630 |
+
"loss": 14.485,
|
| 47631 |
+
"step": 68030
|
| 47632 |
+
},
|
| 47633 |
+
{
|
| 47634 |
+
"epoch": 0.10078865194437367,
|
| 47635 |
+
"grad_norm": 7.78125,
|
| 47636 |
+
"learning_rate": 0.0004833214634261985,
|
| 47637 |
+
"loss": 14.6115,
|
| 47638 |
+
"step": 68040
|
| 47639 |
+
},
|
| 47640 |
+
{
|
| 47641 |
+
"epoch": 0.10080346509133786,
|
| 47642 |
+
"grad_norm": 6.5,
|
| 47643 |
+
"learning_rate": 0.0004833189939586947,
|
| 47644 |
+
"loss": 14.5099,
|
| 47645 |
+
"step": 68050
|
| 47646 |
+
},
|
| 47647 |
+
{
|
| 47648 |
+
"epoch": 0.10081827823830206,
|
| 47649 |
+
"grad_norm": 7.15625,
|
| 47650 |
+
"learning_rate": 0.0004833165244911909,
|
| 47651 |
+
"loss": 14.4668,
|
| 47652 |
+
"step": 68060
|
| 47653 |
+
},
|
| 47654 |
+
{
|
| 47655 |
+
"epoch": 0.10083309138526625,
|
| 47656 |
+
"grad_norm": 6.46875,
|
| 47657 |
+
"learning_rate": 0.00048331405502368715,
|
| 47658 |
+
"loss": 14.5285,
|
| 47659 |
+
"step": 68070
|
| 47660 |
+
},
|
| 47661 |
+
{
|
| 47662 |
+
"epoch": 0.10084790453223044,
|
| 47663 |
+
"grad_norm": 6.8125,
|
| 47664 |
+
"learning_rate": 0.0004833115855561834,
|
| 47665 |
+
"loss": 14.478,
|
| 47666 |
+
"step": 68080
|
| 47667 |
+
},
|
| 47668 |
+
{
|
| 47669 |
+
"epoch": 0.10086271767919464,
|
| 47670 |
+
"grad_norm": 6.28125,
|
| 47671 |
+
"learning_rate": 0.00048330911608867954,
|
| 47672 |
+
"loss": 14.467,
|
| 47673 |
+
"step": 68090
|
| 47674 |
+
},
|
| 47675 |
+
{
|
| 47676 |
+
"epoch": 0.10087753082615883,
|
| 47677 |
+
"grad_norm": 5.71875,
|
| 47678 |
+
"learning_rate": 0.0004833066466211758,
|
| 47679 |
+
"loss": 14.5151,
|
| 47680 |
+
"step": 68100
|
| 47681 |
+
},
|
| 47682 |
+
{
|
| 47683 |
+
"epoch": 0.10089234397312302,
|
| 47684 |
+
"grad_norm": 5.5,
|
| 47685 |
+
"learning_rate": 0.00048330417715367204,
|
| 47686 |
+
"loss": 14.5355,
|
| 47687 |
+
"step": 68110
|
| 47688 |
+
},
|
| 47689 |
+
{
|
| 47690 |
+
"epoch": 0.10090715712008722,
|
| 47691 |
+
"grad_norm": 6.78125,
|
| 47692 |
+
"learning_rate": 0.00048330170768616824,
|
| 47693 |
+
"loss": 14.4846,
|
| 47694 |
+
"step": 68120
|
| 47695 |
+
},
|
| 47696 |
+
{
|
| 47697 |
+
"epoch": 0.10092197026705141,
|
| 47698 |
+
"grad_norm": 5.71875,
|
| 47699 |
+
"learning_rate": 0.00048329923821866444,
|
| 47700 |
+
"loss": 14.5416,
|
| 47701 |
+
"step": 68130
|
| 47702 |
+
},
|
| 47703 |
+
{
|
| 47704 |
+
"epoch": 0.1009367834140156,
|
| 47705 |
+
"grad_norm": 7.0,
|
| 47706 |
+
"learning_rate": 0.0004832967687511607,
|
| 47707 |
+
"loss": 14.4651,
|
| 47708 |
+
"step": 68140
|
| 47709 |
+
},
|
| 47710 |
+
{
|
| 47711 |
+
"epoch": 0.1009515965609798,
|
| 47712 |
+
"grad_norm": 6.125,
|
| 47713 |
+
"learning_rate": 0.0004832942992836569,
|
| 47714 |
+
"loss": 14.5493,
|
| 47715 |
+
"step": 68150
|
| 47716 |
+
},
|
| 47717 |
+
{
|
| 47718 |
+
"epoch": 0.10096640970794399,
|
| 47719 |
+
"grad_norm": 6.5625,
|
| 47720 |
+
"learning_rate": 0.0004832918298161531,
|
| 47721 |
+
"loss": 14.4522,
|
| 47722 |
+
"step": 68160
|
| 47723 |
+
},
|
| 47724 |
+
{
|
| 47725 |
+
"epoch": 0.10098122285490818,
|
| 47726 |
+
"grad_norm": 6.0625,
|
| 47727 |
+
"learning_rate": 0.0004832893603486493,
|
| 47728 |
+
"loss": 14.4597,
|
| 47729 |
+
"step": 68170
|
| 47730 |
+
},
|
| 47731 |
+
{
|
| 47732 |
+
"epoch": 0.10099603600187239,
|
| 47733 |
+
"grad_norm": 6.84375,
|
| 47734 |
+
"learning_rate": 0.00048328689088114553,
|
| 47735 |
+
"loss": 14.5122,
|
| 47736 |
+
"step": 68180
|
| 47737 |
+
},
|
| 47738 |
+
{
|
| 47739 |
+
"epoch": 0.10101084914883658,
|
| 47740 |
+
"grad_norm": 6.75,
|
| 47741 |
+
"learning_rate": 0.0004832844214136418,
|
| 47742 |
+
"loss": 14.4309,
|
| 47743 |
+
"step": 68190
|
| 47744 |
+
},
|
| 47745 |
+
{
|
| 47746 |
+
"epoch": 0.10102566229580077,
|
| 47747 |
+
"grad_norm": 5.78125,
|
| 47748 |
+
"learning_rate": 0.0004832819519461379,
|
| 47749 |
+
"loss": 14.5667,
|
| 47750 |
+
"step": 68200
|
| 47751 |
+
},
|
| 47752 |
+
{
|
| 47753 |
+
"epoch": 0.10104047544276497,
|
| 47754 |
+
"grad_norm": 5.875,
|
| 47755 |
+
"learning_rate": 0.00048327948247863417,
|
| 47756 |
+
"loss": 14.4851,
|
| 47757 |
+
"step": 68210
|
| 47758 |
+
},
|
| 47759 |
+
{
|
| 47760 |
+
"epoch": 0.10105528858972916,
|
| 47761 |
+
"grad_norm": 7.8125,
|
| 47762 |
+
"learning_rate": 0.0004832770130111304,
|
| 47763 |
+
"loss": 14.5788,
|
| 47764 |
+
"step": 68220
|
| 47765 |
+
},
|
| 47766 |
+
{
|
| 47767 |
+
"epoch": 0.10107010173669335,
|
| 47768 |
+
"grad_norm": 6.625,
|
| 47769 |
+
"learning_rate": 0.0004832745435436266,
|
| 47770 |
+
"loss": 14.4581,
|
| 47771 |
+
"step": 68230
|
| 47772 |
+
},
|
| 47773 |
+
{
|
| 47774 |
+
"epoch": 0.10108491488365755,
|
| 47775 |
+
"grad_norm": 8.6875,
|
| 47776 |
+
"learning_rate": 0.0004832720740761228,
|
| 47777 |
+
"loss": 14.4109,
|
| 47778 |
+
"step": 68240
|
| 47779 |
+
},
|
| 47780 |
+
{
|
| 47781 |
+
"epoch": 0.10109972803062174,
|
| 47782 |
+
"grad_norm": 6.15625,
|
| 47783 |
+
"learning_rate": 0.000483269604608619,
|
| 47784 |
+
"loss": 14.5737,
|
| 47785 |
+
"step": 68250
|
| 47786 |
+
},
|
| 47787 |
+
{
|
| 47788 |
+
"epoch": 0.10111454117758593,
|
| 47789 |
+
"grad_norm": 7.5625,
|
| 47790 |
+
"learning_rate": 0.00048326713514111526,
|
| 47791 |
+
"loss": 14.5496,
|
| 47792 |
+
"step": 68260
|
| 47793 |
+
},
|
| 47794 |
+
{
|
| 47795 |
+
"epoch": 0.10112935432455013,
|
| 47796 |
+
"grad_norm": 6.09375,
|
| 47797 |
+
"learning_rate": 0.0004832646656736115,
|
| 47798 |
+
"loss": 14.4273,
|
| 47799 |
+
"step": 68270
|
| 47800 |
+
},
|
| 47801 |
+
{
|
| 47802 |
+
"epoch": 0.10114416747151432,
|
| 47803 |
+
"grad_norm": 8.0,
|
| 47804 |
+
"learning_rate": 0.00048326219620610766,
|
| 47805 |
+
"loss": 14.4684,
|
| 47806 |
+
"step": 68280
|
| 47807 |
+
},
|
| 47808 |
+
{
|
| 47809 |
+
"epoch": 0.10115898061847851,
|
| 47810 |
+
"grad_norm": 7.0625,
|
| 47811 |
+
"learning_rate": 0.0004832597267386039,
|
| 47812 |
+
"loss": 14.5188,
|
| 47813 |
+
"step": 68290
|
| 47814 |
+
},
|
| 47815 |
+
{
|
| 47816 |
+
"epoch": 0.10117379376544271,
|
| 47817 |
+
"grad_norm": 10.5,
|
| 47818 |
+
"learning_rate": 0.00048325725727110016,
|
| 47819 |
+
"loss": 14.5008,
|
| 47820 |
+
"step": 68300
|
| 47821 |
+
},
|
| 47822 |
+
{
|
| 47823 |
+
"epoch": 0.1011886069124069,
|
| 47824 |
+
"grad_norm": 5.46875,
|
| 47825 |
+
"learning_rate": 0.0004832547878035963,
|
| 47826 |
+
"loss": 14.6179,
|
| 47827 |
+
"step": 68310
|
| 47828 |
+
},
|
| 47829 |
+
{
|
| 47830 |
+
"epoch": 0.1012034200593711,
|
| 47831 |
+
"grad_norm": 6.46875,
|
| 47832 |
+
"learning_rate": 0.00048325231833609255,
|
| 47833 |
+
"loss": 14.4873,
|
| 47834 |
+
"step": 68320
|
| 47835 |
+
},
|
| 47836 |
+
{
|
| 47837 |
+
"epoch": 0.10121823320633529,
|
| 47838 |
+
"grad_norm": 6.6875,
|
| 47839 |
+
"learning_rate": 0.0004832498488685888,
|
| 47840 |
+
"loss": 14.4202,
|
| 47841 |
+
"step": 68330
|
| 47842 |
+
},
|
| 47843 |
+
{
|
| 47844 |
+
"epoch": 0.10123304635329948,
|
| 47845 |
+
"grad_norm": 8.25,
|
| 47846 |
+
"learning_rate": 0.000483247379401085,
|
| 47847 |
+
"loss": 14.4236,
|
| 47848 |
+
"step": 68340
|
| 47849 |
+
},
|
| 47850 |
+
{
|
| 47851 |
+
"epoch": 0.10124785950026367,
|
| 47852 |
+
"grad_norm": 6.21875,
|
| 47853 |
+
"learning_rate": 0.0004832449099335812,
|
| 47854 |
+
"loss": 14.5156,
|
| 47855 |
+
"step": 68350
|
| 47856 |
+
},
|
| 47857 |
+
{
|
| 47858 |
+
"epoch": 0.10126267264722787,
|
| 47859 |
+
"grad_norm": 6.0625,
|
| 47860 |
+
"learning_rate": 0.0004832424404660774,
|
| 47861 |
+
"loss": 14.4645,
|
| 47862 |
+
"step": 68360
|
| 47863 |
+
},
|
| 47864 |
+
{
|
| 47865 |
+
"epoch": 0.10127748579419206,
|
| 47866 |
+
"grad_norm": 5.78125,
|
| 47867 |
+
"learning_rate": 0.00048323997099857365,
|
| 47868 |
+
"loss": 14.5254,
|
| 47869 |
+
"step": 68370
|
| 47870 |
+
},
|
| 47871 |
+
{
|
| 47872 |
+
"epoch": 0.10129229894115625,
|
| 47873 |
+
"grad_norm": 8.3125,
|
| 47874 |
+
"learning_rate": 0.0004832375015310699,
|
| 47875 |
+
"loss": 14.474,
|
| 47876 |
+
"step": 68380
|
| 47877 |
+
},
|
| 47878 |
+
{
|
| 47879 |
+
"epoch": 0.10130711208812045,
|
| 47880 |
+
"grad_norm": 7.5625,
|
| 47881 |
+
"learning_rate": 0.00048323503206356604,
|
| 47882 |
+
"loss": 14.4725,
|
| 47883 |
+
"step": 68390
|
| 47884 |
+
},
|
| 47885 |
+
{
|
| 47886 |
+
"epoch": 0.10132192523508464,
|
| 47887 |
+
"grad_norm": 6.28125,
|
| 47888 |
+
"learning_rate": 0.0004832325625960623,
|
| 47889 |
+
"loss": 14.5528,
|
| 47890 |
+
"step": 68400
|
| 47891 |
+
},
|
| 47892 |
+
{
|
| 47893 |
+
"epoch": 0.10133673838204883,
|
| 47894 |
+
"grad_norm": 6.4375,
|
| 47895 |
+
"learning_rate": 0.00048323009312855854,
|
| 47896 |
+
"loss": 14.4089,
|
| 47897 |
+
"step": 68410
|
| 47898 |
+
},
|
| 47899 |
+
{
|
| 47900 |
+
"epoch": 0.10135155152901303,
|
| 47901 |
+
"grad_norm": 6.5625,
|
| 47902 |
+
"learning_rate": 0.00048322762366105474,
|
| 47903 |
+
"loss": 14.5647,
|
| 47904 |
+
"step": 68420
|
| 47905 |
+
},
|
| 47906 |
+
{
|
| 47907 |
+
"epoch": 0.10136636467597722,
|
| 47908 |
+
"grad_norm": 7.21875,
|
| 47909 |
+
"learning_rate": 0.00048322515419355094,
|
| 47910 |
+
"loss": 14.528,
|
| 47911 |
+
"step": 68430
|
| 47912 |
+
},
|
| 47913 |
+
{
|
| 47914 |
+
"epoch": 0.10138117782294141,
|
| 47915 |
+
"grad_norm": 6.25,
|
| 47916 |
+
"learning_rate": 0.0004832226847260472,
|
| 47917 |
+
"loss": 14.4197,
|
| 47918 |
+
"step": 68440
|
| 47919 |
+
},
|
| 47920 |
+
{
|
| 47921 |
+
"epoch": 0.1013959909699056,
|
| 47922 |
+
"grad_norm": 6.3125,
|
| 47923 |
+
"learning_rate": 0.0004832202152585434,
|
| 47924 |
+
"loss": 14.5149,
|
| 47925 |
+
"step": 68450
|
| 47926 |
+
},
|
| 47927 |
+
{
|
| 47928 |
+
"epoch": 0.1014108041168698,
|
| 47929 |
+
"grad_norm": 6.8125,
|
| 47930 |
+
"learning_rate": 0.00048321774579103963,
|
| 47931 |
+
"loss": 14.5349,
|
| 47932 |
+
"step": 68460
|
| 47933 |
+
},
|
| 47934 |
+
{
|
| 47935 |
+
"epoch": 0.10142561726383399,
|
| 47936 |
+
"grad_norm": 5.84375,
|
| 47937 |
+
"learning_rate": 0.0004832152763235358,
|
| 47938 |
+
"loss": 14.4364,
|
| 47939 |
+
"step": 68470
|
| 47940 |
+
},
|
| 47941 |
+
{
|
| 47942 |
+
"epoch": 0.10144043041079819,
|
| 47943 |
+
"grad_norm": 6.4375,
|
| 47944 |
+
"learning_rate": 0.00048321280685603203,
|
| 47945 |
+
"loss": 14.5028,
|
| 47946 |
+
"step": 68480
|
| 47947 |
+
},
|
| 47948 |
+
{
|
| 47949 |
+
"epoch": 0.10145524355776238,
|
| 47950 |
+
"grad_norm": 6.625,
|
| 47951 |
+
"learning_rate": 0.0004832103373885283,
|
| 47952 |
+
"loss": 14.554,
|
| 47953 |
+
"step": 68490
|
| 47954 |
+
},
|
| 47955 |
+
{
|
| 47956 |
+
"epoch": 0.10147005670472659,
|
| 47957 |
+
"grad_norm": 6.0625,
|
| 47958 |
+
"learning_rate": 0.0004832078679210244,
|
| 47959 |
+
"loss": 14.5277,
|
| 47960 |
+
"step": 68500
|
| 47961 |
+
},
|
| 47962 |
+
{
|
| 47963 |
+
"epoch": 0.10148486985169078,
|
| 47964 |
+
"grad_norm": 6.21875,
|
| 47965 |
+
"learning_rate": 0.00048320539845352067,
|
| 47966 |
+
"loss": 14.4506,
|
| 47967 |
+
"step": 68510
|
| 47968 |
+
},
|
| 47969 |
+
{
|
| 47970 |
+
"epoch": 0.10149968299865497,
|
| 47971 |
+
"grad_norm": 5.90625,
|
| 47972 |
+
"learning_rate": 0.0004832029289860169,
|
| 47973 |
+
"loss": 14.4963,
|
| 47974 |
+
"step": 68520
|
| 47975 |
+
},
|
| 47976 |
+
{
|
| 47977 |
+
"epoch": 0.10151449614561917,
|
| 47978 |
+
"grad_norm": 5.8125,
|
| 47979 |
+
"learning_rate": 0.0004832004595185131,
|
| 47980 |
+
"loss": 14.3869,
|
| 47981 |
+
"step": 68530
|
| 47982 |
+
},
|
| 47983 |
+
{
|
| 47984 |
+
"epoch": 0.10152930929258336,
|
| 47985 |
+
"grad_norm": 6.40625,
|
| 47986 |
+
"learning_rate": 0.0004831979900510093,
|
| 47987 |
+
"loss": 14.5387,
|
| 47988 |
+
"step": 68540
|
| 47989 |
+
},
|
| 47990 |
+
{
|
| 47991 |
+
"epoch": 0.10154412243954755,
|
| 47992 |
+
"grad_norm": 6.875,
|
| 47993 |
+
"learning_rate": 0.0004831955205835055,
|
| 47994 |
+
"loss": 14.4626,
|
| 47995 |
+
"step": 68550
|
| 47996 |
+
},
|
| 47997 |
+
{
|
| 47998 |
+
"epoch": 0.10155893558651174,
|
| 47999 |
+
"grad_norm": 5.59375,
|
| 48000 |
+
"learning_rate": 0.00048319305111600176,
|
| 48001 |
+
"loss": 14.4996,
|
| 48002 |
+
"step": 68560
|
| 48003 |
+
},
|
| 48004 |
+
{
|
| 48005 |
+
"epoch": 0.10157374873347594,
|
| 48006 |
+
"grad_norm": 6.21875,
|
| 48007 |
+
"learning_rate": 0.000483190581648498,
|
| 48008 |
+
"loss": 14.4373,
|
| 48009 |
+
"step": 68570
|
| 48010 |
+
},
|
| 48011 |
+
{
|
| 48012 |
+
"epoch": 0.10158856188044013,
|
| 48013 |
+
"grad_norm": 7.21875,
|
| 48014 |
+
"learning_rate": 0.00048318811218099416,
|
| 48015 |
+
"loss": 14.4643,
|
| 48016 |
+
"step": 68580
|
| 48017 |
+
},
|
| 48018 |
+
{
|
| 48019 |
+
"epoch": 0.10160337502740432,
|
| 48020 |
+
"grad_norm": 6.625,
|
| 48021 |
+
"learning_rate": 0.0004831856427134904,
|
| 48022 |
+
"loss": 14.5462,
|
| 48023 |
+
"step": 68590
|
| 48024 |
+
},
|
| 48025 |
+
{
|
| 48026 |
+
"epoch": 0.10161818817436852,
|
| 48027 |
+
"grad_norm": 7.0,
|
| 48028 |
+
"learning_rate": 0.00048318317324598666,
|
| 48029 |
+
"loss": 14.5107,
|
| 48030 |
+
"step": 68600
|
| 48031 |
+
},
|
| 48032 |
+
{
|
| 48033 |
+
"epoch": 0.10163300132133271,
|
| 48034 |
+
"grad_norm": 5.84375,
|
| 48035 |
+
"learning_rate": 0.00048318070377848286,
|
| 48036 |
+
"loss": 14.5248,
|
| 48037 |
+
"step": 68610
|
| 48038 |
+
},
|
| 48039 |
+
{
|
| 48040 |
+
"epoch": 0.1016478144682969,
|
| 48041 |
+
"grad_norm": 7.0,
|
| 48042 |
+
"learning_rate": 0.00048317823431097905,
|
| 48043 |
+
"loss": 14.4153,
|
| 48044 |
+
"step": 68620
|
| 48045 |
+
},
|
| 48046 |
+
{
|
| 48047 |
+
"epoch": 0.1016626276152611,
|
| 48048 |
+
"grad_norm": 6.03125,
|
| 48049 |
+
"learning_rate": 0.0004831757648434753,
|
| 48050 |
+
"loss": 14.4449,
|
| 48051 |
+
"step": 68630
|
| 48052 |
+
},
|
| 48053 |
+
{
|
| 48054 |
+
"epoch": 0.10167744076222529,
|
| 48055 |
+
"grad_norm": 7.8125,
|
| 48056 |
+
"learning_rate": 0.0004831732953759715,
|
| 48057 |
+
"loss": 14.3906,
|
| 48058 |
+
"step": 68640
|
| 48059 |
+
},
|
| 48060 |
+
{
|
| 48061 |
+
"epoch": 0.10169225390918948,
|
| 48062 |
+
"grad_norm": 5.6875,
|
| 48063 |
+
"learning_rate": 0.00048317082590846775,
|
| 48064 |
+
"loss": 14.5081,
|
| 48065 |
+
"step": 68650
|
| 48066 |
+
},
|
| 48067 |
+
{
|
| 48068 |
+
"epoch": 0.10170706705615368,
|
| 48069 |
+
"grad_norm": 13.0,
|
| 48070 |
+
"learning_rate": 0.0004831683564409639,
|
| 48071 |
+
"loss": 14.4611,
|
| 48072 |
+
"step": 68660
|
| 48073 |
+
},
|
| 48074 |
+
{
|
| 48075 |
+
"epoch": 0.10172188020311787,
|
| 48076 |
+
"grad_norm": 7.3125,
|
| 48077 |
+
"learning_rate": 0.00048316588697346015,
|
| 48078 |
+
"loss": 14.494,
|
| 48079 |
+
"step": 68670
|
| 48080 |
+
},
|
| 48081 |
+
{
|
| 48082 |
+
"epoch": 0.10173669335008206,
|
| 48083 |
+
"grad_norm": 5.875,
|
| 48084 |
+
"learning_rate": 0.0004831634175059564,
|
| 48085 |
+
"loss": 14.5145,
|
| 48086 |
+
"step": 68680
|
| 48087 |
+
},
|
| 48088 |
+
{
|
| 48089 |
+
"epoch": 0.10175150649704626,
|
| 48090 |
+
"grad_norm": 6.0625,
|
| 48091 |
+
"learning_rate": 0.00048316094803845254,
|
| 48092 |
+
"loss": 14.5251,
|
| 48093 |
+
"step": 68690
|
| 48094 |
+
},
|
| 48095 |
+
{
|
| 48096 |
+
"epoch": 0.10176631964401045,
|
| 48097 |
+
"grad_norm": 6.34375,
|
| 48098 |
+
"learning_rate": 0.0004831584785709488,
|
| 48099 |
+
"loss": 14.523,
|
| 48100 |
+
"step": 68700
|
| 48101 |
+
},
|
| 48102 |
+
{
|
| 48103 |
+
"epoch": 0.10178113279097464,
|
| 48104 |
+
"grad_norm": 7.25,
|
| 48105 |
+
"learning_rate": 0.00048315600910344504,
|
| 48106 |
+
"loss": 14.5185,
|
| 48107 |
+
"step": 68710
|
| 48108 |
+
},
|
| 48109 |
+
{
|
| 48110 |
+
"epoch": 0.10179594593793884,
|
| 48111 |
+
"grad_norm": 7.09375,
|
| 48112 |
+
"learning_rate": 0.00048315353963594124,
|
| 48113 |
+
"loss": 14.4535,
|
| 48114 |
+
"step": 68720
|
| 48115 |
+
},
|
| 48116 |
+
{
|
| 48117 |
+
"epoch": 0.10181075908490303,
|
| 48118 |
+
"grad_norm": 6.6875,
|
| 48119 |
+
"learning_rate": 0.00048315107016843744,
|
| 48120 |
+
"loss": 14.4577,
|
| 48121 |
+
"step": 68730
|
| 48122 |
+
},
|
| 48123 |
+
{
|
| 48124 |
+
"epoch": 0.10182557223186722,
|
| 48125 |
+
"grad_norm": 6.375,
|
| 48126 |
+
"learning_rate": 0.0004831486007009337,
|
| 48127 |
+
"loss": 14.3694,
|
| 48128 |
+
"step": 68740
|
| 48129 |
+
},
|
| 48130 |
+
{
|
| 48131 |
+
"epoch": 0.10184038537883142,
|
| 48132 |
+
"grad_norm": 6.65625,
|
| 48133 |
+
"learning_rate": 0.0004831461312334299,
|
| 48134 |
+
"loss": 14.4843,
|
| 48135 |
+
"step": 68750
|
| 48136 |
+
},
|
| 48137 |
+
{
|
| 48138 |
+
"epoch": 0.10185519852579561,
|
| 48139 |
+
"grad_norm": 6.4375,
|
| 48140 |
+
"learning_rate": 0.00048314366176592613,
|
| 48141 |
+
"loss": 14.5038,
|
| 48142 |
+
"step": 68760
|
| 48143 |
+
},
|
| 48144 |
+
{
|
| 48145 |
+
"epoch": 0.1018700116727598,
|
| 48146 |
+
"grad_norm": 6.40625,
|
| 48147 |
+
"learning_rate": 0.0004831411922984223,
|
| 48148 |
+
"loss": 14.4611,
|
| 48149 |
+
"step": 68770
|
| 48150 |
+
},
|
| 48151 |
+
{
|
| 48152 |
+
"epoch": 0.101884824819724,
|
| 48153 |
+
"grad_norm": 6.3125,
|
| 48154 |
+
"learning_rate": 0.00048313872283091853,
|
| 48155 |
+
"loss": 14.4145,
|
| 48156 |
+
"step": 68780
|
| 48157 |
+
},
|
| 48158 |
+
{
|
| 48159 |
+
"epoch": 0.10189963796668819,
|
| 48160 |
+
"grad_norm": 6.0625,
|
| 48161 |
+
"learning_rate": 0.0004831362533634148,
|
| 48162 |
+
"loss": 14.3738,
|
| 48163 |
+
"step": 68790
|
| 48164 |
+
},
|
| 48165 |
+
{
|
| 48166 |
+
"epoch": 0.10191445111365238,
|
| 48167 |
+
"grad_norm": 6.9375,
|
| 48168 |
+
"learning_rate": 0.000483133783895911,
|
| 48169 |
+
"loss": 14.4142,
|
| 48170 |
+
"step": 68800
|
| 48171 |
+
},
|
| 48172 |
+
{
|
| 48173 |
+
"epoch": 0.10192926426061658,
|
| 48174 |
+
"grad_norm": 6.34375,
|
| 48175 |
+
"learning_rate": 0.00048313131442840717,
|
| 48176 |
+
"loss": 14.4577,
|
| 48177 |
+
"step": 68810
|
| 48178 |
+
},
|
| 48179 |
+
{
|
| 48180 |
+
"epoch": 0.10194407740758078,
|
| 48181 |
+
"grad_norm": 6.96875,
|
| 48182 |
+
"learning_rate": 0.0004831288449609034,
|
| 48183 |
+
"loss": 14.4871,
|
| 48184 |
+
"step": 68820
|
| 48185 |
+
},
|
| 48186 |
+
{
|
| 48187 |
+
"epoch": 0.10195889055454498,
|
| 48188 |
+
"grad_norm": 6.1875,
|
| 48189 |
+
"learning_rate": 0.0004831263754933996,
|
| 48190 |
+
"loss": 14.4809,
|
| 48191 |
+
"step": 68830
|
| 48192 |
+
},
|
| 48193 |
+
{
|
| 48194 |
+
"epoch": 0.10197370370150917,
|
| 48195 |
+
"grad_norm": 5.8125,
|
| 48196 |
+
"learning_rate": 0.0004831239060258958,
|
| 48197 |
+
"loss": 14.4981,
|
| 48198 |
+
"step": 68840
|
| 48199 |
+
},
|
| 48200 |
+
{
|
| 48201 |
+
"epoch": 0.10198851684847336,
|
| 48202 |
+
"grad_norm": 26.375,
|
| 48203 |
+
"learning_rate": 0.000483121436558392,
|
| 48204 |
+
"loss": 14.4678,
|
| 48205 |
+
"step": 68850
|
| 48206 |
+
},
|
| 48207 |
+
{
|
| 48208 |
+
"epoch": 0.10200332999543756,
|
| 48209 |
+
"grad_norm": 8.0,
|
| 48210 |
+
"learning_rate": 0.00048311896709088827,
|
| 48211 |
+
"loss": 14.5394,
|
| 48212 |
+
"step": 68860
|
| 48213 |
+
},
|
| 48214 |
+
{
|
| 48215 |
+
"epoch": 0.10201814314240175,
|
| 48216 |
+
"grad_norm": 6.75,
|
| 48217 |
+
"learning_rate": 0.0004831164976233845,
|
| 48218 |
+
"loss": 14.4764,
|
| 48219 |
+
"step": 68870
|
| 48220 |
+
},
|
| 48221 |
+
{
|
| 48222 |
+
"epoch": 0.10203295628936594,
|
| 48223 |
+
"grad_norm": 6.90625,
|
| 48224 |
+
"learning_rate": 0.00048311402815588066,
|
| 48225 |
+
"loss": 14.5575,
|
| 48226 |
+
"step": 68880
|
| 48227 |
+
},
|
| 48228 |
+
{
|
| 48229 |
+
"epoch": 0.10204776943633014,
|
| 48230 |
+
"grad_norm": 6.3125,
|
| 48231 |
+
"learning_rate": 0.0004831115586883769,
|
| 48232 |
+
"loss": 14.5167,
|
| 48233 |
+
"step": 68890
|
| 48234 |
+
},
|
| 48235 |
+
{
|
| 48236 |
+
"epoch": 0.10206258258329433,
|
| 48237 |
+
"grad_norm": 6.875,
|
| 48238 |
+
"learning_rate": 0.00048310908922087316,
|
| 48239 |
+
"loss": 14.4134,
|
| 48240 |
+
"step": 68900
|
| 48241 |
+
},
|
| 48242 |
+
{
|
| 48243 |
+
"epoch": 0.10207739573025852,
|
| 48244 |
+
"grad_norm": 6.09375,
|
| 48245 |
+
"learning_rate": 0.00048310661975336936,
|
| 48246 |
+
"loss": 14.4839,
|
| 48247 |
+
"step": 68910
|
| 48248 |
+
},
|
| 48249 |
+
{
|
| 48250 |
+
"epoch": 0.10209220887722271,
|
| 48251 |
+
"grad_norm": 7.0,
|
| 48252 |
+
"learning_rate": 0.00048310415028586555,
|
| 48253 |
+
"loss": 14.4033,
|
| 48254 |
+
"step": 68920
|
| 48255 |
+
},
|
| 48256 |
+
{
|
| 48257 |
+
"epoch": 0.10210702202418691,
|
| 48258 |
+
"grad_norm": 5.875,
|
| 48259 |
+
"learning_rate": 0.0004831016808183618,
|
| 48260 |
+
"loss": 14.4383,
|
| 48261 |
+
"step": 68930
|
| 48262 |
+
},
|
| 48263 |
+
{
|
| 48264 |
+
"epoch": 0.1021218351711511,
|
| 48265 |
+
"grad_norm": 6.75,
|
| 48266 |
+
"learning_rate": 0.000483099211350858,
|
| 48267 |
+
"loss": 14.4953,
|
| 48268 |
+
"step": 68940
|
| 48269 |
+
},
|
| 48270 |
+
{
|
| 48271 |
+
"epoch": 0.1021366483181153,
|
| 48272 |
+
"grad_norm": 6.15625,
|
| 48273 |
+
"learning_rate": 0.00048309674188335425,
|
| 48274 |
+
"loss": 14.5184,
|
| 48275 |
+
"step": 68950
|
| 48276 |
+
},
|
| 48277 |
+
{
|
| 48278 |
+
"epoch": 0.10215146146507949,
|
| 48279 |
+
"grad_norm": 7.90625,
|
| 48280 |
+
"learning_rate": 0.0004830942724158504,
|
| 48281 |
+
"loss": 14.4386,
|
| 48282 |
+
"step": 68960
|
| 48283 |
+
},
|
| 48284 |
+
{
|
| 48285 |
+
"epoch": 0.10216627461204368,
|
| 48286 |
+
"grad_norm": 6.03125,
|
| 48287 |
+
"learning_rate": 0.00048309180294834665,
|
| 48288 |
+
"loss": 14.5343,
|
| 48289 |
+
"step": 68970
|
| 48290 |
+
},
|
| 48291 |
+
{
|
| 48292 |
+
"epoch": 0.10218108775900787,
|
| 48293 |
+
"grad_norm": 6.71875,
|
| 48294 |
+
"learning_rate": 0.0004830893334808429,
|
| 48295 |
+
"loss": 14.549,
|
| 48296 |
+
"step": 68980
|
| 48297 |
+
},
|
| 48298 |
+
{
|
| 48299 |
+
"epoch": 0.10219590090597207,
|
| 48300 |
+
"grad_norm": 6.4375,
|
| 48301 |
+
"learning_rate": 0.00048308686401333904,
|
| 48302 |
+
"loss": 14.4445,
|
| 48303 |
+
"step": 68990
|
| 48304 |
+
},
|
| 48305 |
+
{
|
| 48306 |
+
"epoch": 0.10221071405293626,
|
| 48307 |
+
"grad_norm": 7.5,
|
| 48308 |
+
"learning_rate": 0.0004830843945458353,
|
| 48309 |
+
"loss": 14.5447,
|
| 48310 |
+
"step": 69000
|
| 48311 |
+
},
|
| 48312 |
+
{
|
| 48313 |
+
"epoch": 0.10222552719990045,
|
| 48314 |
+
"grad_norm": 5.25,
|
| 48315 |
+
"learning_rate": 0.00048308192507833154,
|
| 48316 |
+
"loss": 14.3885,
|
| 48317 |
+
"step": 69010
|
| 48318 |
+
},
|
| 48319 |
+
{
|
| 48320 |
+
"epoch": 0.10224034034686465,
|
| 48321 |
+
"grad_norm": 6.40625,
|
| 48322 |
+
"learning_rate": 0.00048307945561082774,
|
| 48323 |
+
"loss": 14.4071,
|
| 48324 |
+
"step": 69020
|
| 48325 |
+
},
|
| 48326 |
+
{
|
| 48327 |
+
"epoch": 0.10225515349382884,
|
| 48328 |
+
"grad_norm": 6.84375,
|
| 48329 |
+
"learning_rate": 0.00048307698614332394,
|
| 48330 |
+
"loss": 14.4863,
|
| 48331 |
+
"step": 69030
|
| 48332 |
+
},
|
| 48333 |
+
{
|
| 48334 |
+
"epoch": 0.10226996664079303,
|
| 48335 |
+
"grad_norm": 6.78125,
|
| 48336 |
+
"learning_rate": 0.0004830745166758202,
|
| 48337 |
+
"loss": 14.4963,
|
| 48338 |
+
"step": 69040
|
| 48339 |
+
},
|
| 48340 |
+
{
|
| 48341 |
+
"epoch": 0.10228477978775723,
|
| 48342 |
+
"grad_norm": 8.625,
|
| 48343 |
+
"learning_rate": 0.0004830720472083164,
|
| 48344 |
+
"loss": 14.4571,
|
| 48345 |
+
"step": 69050
|
| 48346 |
+
},
|
| 48347 |
+
{
|
| 48348 |
+
"epoch": 0.10229959293472142,
|
| 48349 |
+
"grad_norm": 7.53125,
|
| 48350 |
+
"learning_rate": 0.00048306957774081263,
|
| 48351 |
+
"loss": 14.4907,
|
| 48352 |
+
"step": 69060
|
| 48353 |
+
},
|
| 48354 |
+
{
|
| 48355 |
+
"epoch": 0.10231440608168561,
|
| 48356 |
+
"grad_norm": 6.4375,
|
| 48357 |
+
"learning_rate": 0.0004830671082733088,
|
| 48358 |
+
"loss": 14.4523,
|
| 48359 |
+
"step": 69070
|
| 48360 |
+
},
|
| 48361 |
+
{
|
| 48362 |
+
"epoch": 0.1023292192286498,
|
| 48363 |
+
"grad_norm": 6.625,
|
| 48364 |
+
"learning_rate": 0.00048306463880580503,
|
| 48365 |
+
"loss": 14.436,
|
| 48366 |
+
"step": 69080
|
| 48367 |
+
},
|
| 48368 |
+
{
|
| 48369 |
+
"epoch": 0.102344032375614,
|
| 48370 |
+
"grad_norm": 7.0,
|
| 48371 |
+
"learning_rate": 0.0004830621693383013,
|
| 48372 |
+
"loss": 14.5049,
|
| 48373 |
+
"step": 69090
|
| 48374 |
+
},
|
| 48375 |
+
{
|
| 48376 |
+
"epoch": 0.10235884552257819,
|
| 48377 |
+
"grad_norm": 6.15625,
|
| 48378 |
+
"learning_rate": 0.0004830596998707975,
|
| 48379 |
+
"loss": 14.3927,
|
| 48380 |
+
"step": 69100
|
| 48381 |
+
},
|
| 48382 |
+
{
|
| 48383 |
+
"epoch": 0.10237365866954239,
|
| 48384 |
+
"grad_norm": 5.78125,
|
| 48385 |
+
"learning_rate": 0.0004830572304032937,
|
| 48386 |
+
"loss": 14.5091,
|
| 48387 |
+
"step": 69110
|
| 48388 |
+
},
|
| 48389 |
+
{
|
| 48390 |
+
"epoch": 0.10238847181650658,
|
| 48391 |
+
"grad_norm": 6.125,
|
| 48392 |
+
"learning_rate": 0.0004830547609357899,
|
| 48393 |
+
"loss": 14.4833,
|
| 48394 |
+
"step": 69120
|
| 48395 |
+
},
|
| 48396 |
+
{
|
| 48397 |
+
"epoch": 0.10240328496347079,
|
| 48398 |
+
"grad_norm": 6.625,
|
| 48399 |
+
"learning_rate": 0.0004830522914682861,
|
| 48400 |
+
"loss": 14.5416,
|
| 48401 |
+
"step": 69130
|
| 48402 |
+
},
|
| 48403 |
+
{
|
| 48404 |
+
"epoch": 0.10241809811043498,
|
| 48405 |
+
"grad_norm": 21.125,
|
| 48406 |
+
"learning_rate": 0.00048304982200078237,
|
| 48407 |
+
"loss": 14.4866,
|
| 48408 |
+
"step": 69140
|
| 48409 |
+
},
|
| 48410 |
+
{
|
| 48411 |
+
"epoch": 0.10243291125739917,
|
| 48412 |
+
"grad_norm": 6.84375,
|
| 48413 |
+
"learning_rate": 0.0004830473525332785,
|
| 48414 |
+
"loss": 14.5278,
|
| 48415 |
+
"step": 69150
|
| 48416 |
+
},
|
| 48417 |
+
{
|
| 48418 |
+
"epoch": 0.10244772440436337,
|
| 48419 |
+
"grad_norm": 8.4375,
|
| 48420 |
+
"learning_rate": 0.00048304488306577477,
|
| 48421 |
+
"loss": 14.5336,
|
| 48422 |
+
"step": 69160
|
| 48423 |
+
},
|
| 48424 |
+
{
|
| 48425 |
+
"epoch": 0.10246253755132756,
|
| 48426 |
+
"grad_norm": 7.1875,
|
| 48427 |
+
"learning_rate": 0.000483042413598271,
|
| 48428 |
+
"loss": 14.4245,
|
| 48429 |
+
"step": 69170
|
| 48430 |
+
},
|
| 48431 |
+
{
|
| 48432 |
+
"epoch": 0.10247735069829175,
|
| 48433 |
+
"grad_norm": 5.9375,
|
| 48434 |
+
"learning_rate": 0.00048303994413076716,
|
| 48435 |
+
"loss": 14.4922,
|
| 48436 |
+
"step": 69180
|
| 48437 |
+
},
|
| 48438 |
+
{
|
| 48439 |
+
"epoch": 0.10249216384525595,
|
| 48440 |
+
"grad_norm": 6.71875,
|
| 48441 |
+
"learning_rate": 0.0004830374746632634,
|
| 48442 |
+
"loss": 14.428,
|
| 48443 |
+
"step": 69190
|
| 48444 |
+
},
|
| 48445 |
+
{
|
| 48446 |
+
"epoch": 0.10250697699222014,
|
| 48447 |
+
"grad_norm": 9.0,
|
| 48448 |
+
"learning_rate": 0.00048303500519575966,
|
| 48449 |
+
"loss": 14.469,
|
| 48450 |
+
"step": 69200
|
| 48451 |
+
},
|
| 48452 |
+
{
|
| 48453 |
+
"epoch": 0.10252179013918433,
|
| 48454 |
+
"grad_norm": 5.1875,
|
| 48455 |
+
"learning_rate": 0.00048303253572825586,
|
| 48456 |
+
"loss": 14.4531,
|
| 48457 |
+
"step": 69210
|
| 48458 |
+
},
|
| 48459 |
+
{
|
| 48460 |
+
"epoch": 0.10253660328614853,
|
| 48461 |
+
"grad_norm": 7.625,
|
| 48462 |
+
"learning_rate": 0.00048303006626075205,
|
| 48463 |
+
"loss": 14.4411,
|
| 48464 |
+
"step": 69220
|
| 48465 |
+
},
|
| 48466 |
+
{
|
| 48467 |
+
"epoch": 0.10255141643311272,
|
| 48468 |
+
"grad_norm": 6.59375,
|
| 48469 |
+
"learning_rate": 0.0004830275967932483,
|
| 48470 |
+
"loss": 14.4179,
|
| 48471 |
+
"step": 69230
|
| 48472 |
+
},
|
| 48473 |
+
{
|
| 48474 |
+
"epoch": 0.10256622958007691,
|
| 48475 |
+
"grad_norm": 7.1875,
|
| 48476 |
+
"learning_rate": 0.0004830251273257445,
|
| 48477 |
+
"loss": 14.4906,
|
| 48478 |
+
"step": 69240
|
| 48479 |
+
},
|
| 48480 |
+
{
|
| 48481 |
+
"epoch": 0.1025810427270411,
|
| 48482 |
+
"grad_norm": 6.6875,
|
| 48483 |
+
"learning_rate": 0.00048302265785824075,
|
| 48484 |
+
"loss": 14.3611,
|
| 48485 |
+
"step": 69250
|
| 48486 |
+
},
|
| 48487 |
+
{
|
| 48488 |
+
"epoch": 0.1025958558740053,
|
| 48489 |
+
"grad_norm": 5.9375,
|
| 48490 |
+
"learning_rate": 0.0004830201883907369,
|
| 48491 |
+
"loss": 14.4967,
|
| 48492 |
+
"step": 69260
|
| 48493 |
+
},
|
| 48494 |
+
{
|
| 48495 |
+
"epoch": 0.10261066902096949,
|
| 48496 |
+
"grad_norm": 7.5625,
|
| 48497 |
+
"learning_rate": 0.00048301771892323315,
|
| 48498 |
+
"loss": 14.4009,
|
| 48499 |
+
"step": 69270
|
| 48500 |
+
},
|
| 48501 |
+
{
|
| 48502 |
+
"epoch": 0.10262548216793368,
|
| 48503 |
+
"grad_norm": 5.71875,
|
| 48504 |
+
"learning_rate": 0.0004830152494557294,
|
| 48505 |
+
"loss": 14.4834,
|
| 48506 |
+
"step": 69280
|
| 48507 |
+
},
|
| 48508 |
+
{
|
| 48509 |
+
"epoch": 0.10264029531489788,
|
| 48510 |
+
"grad_norm": 6.5,
|
| 48511 |
+
"learning_rate": 0.0004830127799882256,
|
| 48512 |
+
"loss": 14.449,
|
| 48513 |
+
"step": 69290
|
| 48514 |
+
},
|
| 48515 |
+
{
|
| 48516 |
+
"epoch": 0.10265510846186207,
|
| 48517 |
+
"grad_norm": 6.375,
|
| 48518 |
+
"learning_rate": 0.0004830103105207218,
|
| 48519 |
+
"loss": 14.4789,
|
| 48520 |
+
"step": 69300
|
| 48521 |
+
},
|
| 48522 |
+
{
|
| 48523 |
+
"epoch": 0.10266992160882626,
|
| 48524 |
+
"grad_norm": 6.21875,
|
| 48525 |
+
"learning_rate": 0.00048300784105321804,
|
| 48526 |
+
"loss": 14.5274,
|
| 48527 |
+
"step": 69310
|
| 48528 |
+
},
|
| 48529 |
+
{
|
| 48530 |
+
"epoch": 0.10268473475579046,
|
| 48531 |
+
"grad_norm": 6.78125,
|
| 48532 |
+
"learning_rate": 0.00048300537158571424,
|
| 48533 |
+
"loss": 14.499,
|
| 48534 |
+
"step": 69320
|
| 48535 |
+
},
|
| 48536 |
+
{
|
| 48537 |
+
"epoch": 0.10269954790275465,
|
| 48538 |
+
"grad_norm": 6.375,
|
| 48539 |
+
"learning_rate": 0.00048300290211821044,
|
| 48540 |
+
"loss": 14.4059,
|
| 48541 |
+
"step": 69330
|
| 48542 |
+
},
|
| 48543 |
+
{
|
| 48544 |
+
"epoch": 0.10271436104971884,
|
| 48545 |
+
"grad_norm": 6.6875,
|
| 48546 |
+
"learning_rate": 0.0004830004326507067,
|
| 48547 |
+
"loss": 14.3767,
|
| 48548 |
+
"step": 69340
|
| 48549 |
+
},
|
| 48550 |
+
{
|
| 48551 |
+
"epoch": 0.10272917419668304,
|
| 48552 |
+
"grad_norm": 5.90625,
|
| 48553 |
+
"learning_rate": 0.0004829979631832029,
|
| 48554 |
+
"loss": 14.3247,
|
| 48555 |
+
"step": 69350
|
| 48556 |
+
},
|
| 48557 |
+
{
|
| 48558 |
+
"epoch": 0.10274398734364723,
|
| 48559 |
+
"grad_norm": 7.34375,
|
| 48560 |
+
"learning_rate": 0.00048299549371569914,
|
| 48561 |
+
"loss": 14.4829,
|
| 48562 |
+
"step": 69360
|
| 48563 |
+
},
|
| 48564 |
+
{
|
| 48565 |
+
"epoch": 0.10275880049061142,
|
| 48566 |
+
"grad_norm": 5.65625,
|
| 48567 |
+
"learning_rate": 0.0004829930242481953,
|
| 48568 |
+
"loss": 14.3248,
|
| 48569 |
+
"step": 69370
|
| 48570 |
+
},
|
| 48571 |
+
{
|
| 48572 |
+
"epoch": 0.10277361363757562,
|
| 48573 |
+
"grad_norm": 7.09375,
|
| 48574 |
+
"learning_rate": 0.00048299055478069153,
|
| 48575 |
+
"loss": 14.415,
|
| 48576 |
+
"step": 69380
|
| 48577 |
+
},
|
| 48578 |
+
{
|
| 48579 |
+
"epoch": 0.10278842678453981,
|
| 48580 |
+
"grad_norm": 6.96875,
|
| 48581 |
+
"learning_rate": 0.0004829880853131878,
|
| 48582 |
+
"loss": 14.4884,
|
| 48583 |
+
"step": 69390
|
| 48584 |
+
},
|
| 48585 |
+
{
|
| 48586 |
+
"epoch": 0.102803239931504,
|
| 48587 |
+
"grad_norm": 6.5,
|
| 48588 |
+
"learning_rate": 0.000482985615845684,
|
| 48589 |
+
"loss": 14.5135,
|
| 48590 |
+
"step": 69400
|
| 48591 |
+
},
|
| 48592 |
+
{
|
| 48593 |
+
"epoch": 0.1028180530784682,
|
| 48594 |
+
"grad_norm": 7.65625,
|
| 48595 |
+
"learning_rate": 0.0004829831463781802,
|
| 48596 |
+
"loss": 14.496,
|
| 48597 |
+
"step": 69410
|
| 48598 |
+
},
|
| 48599 |
+
{
|
| 48600 |
+
"epoch": 0.10283286622543239,
|
| 48601 |
+
"grad_norm": 5.78125,
|
| 48602 |
+
"learning_rate": 0.0004829806769106764,
|
| 48603 |
+
"loss": 14.4923,
|
| 48604 |
+
"step": 69420
|
| 48605 |
+
},
|
| 48606 |
+
{
|
| 48607 |
+
"epoch": 0.10284767937239658,
|
| 48608 |
+
"grad_norm": 11.1875,
|
| 48609 |
+
"learning_rate": 0.0004829782074431726,
|
| 48610 |
+
"loss": 14.5196,
|
| 48611 |
+
"step": 69430
|
| 48612 |
+
},
|
| 48613 |
+
{
|
| 48614 |
+
"epoch": 0.10286249251936078,
|
| 48615 |
+
"grad_norm": 8.625,
|
| 48616 |
+
"learning_rate": 0.00048297573797566887,
|
| 48617 |
+
"loss": 14.3853,
|
| 48618 |
+
"step": 69440
|
| 48619 |
+
},
|
| 48620 |
+
{
|
| 48621 |
+
"epoch": 0.10287730566632498,
|
| 48622 |
+
"grad_norm": 18.125,
|
| 48623 |
+
"learning_rate": 0.000482973268508165,
|
| 48624 |
+
"loss": 14.5263,
|
| 48625 |
+
"step": 69450
|
| 48626 |
+
},
|
| 48627 |
+
{
|
| 48628 |
+
"epoch": 0.10289211881328918,
|
| 48629 |
+
"grad_norm": 6.15625,
|
| 48630 |
+
"learning_rate": 0.00048297079904066127,
|
| 48631 |
+
"loss": 14.5711,
|
| 48632 |
+
"step": 69460
|
| 48633 |
+
},
|
| 48634 |
+
{
|
| 48635 |
+
"epoch": 0.10290693196025337,
|
| 48636 |
+
"grad_norm": 7.3125,
|
| 48637 |
+
"learning_rate": 0.0004829683295731575,
|
| 48638 |
+
"loss": 14.4885,
|
| 48639 |
+
"step": 69470
|
| 48640 |
+
},
|
| 48641 |
+
{
|
| 48642 |
+
"epoch": 0.10292174510721756,
|
| 48643 |
+
"grad_norm": 5.875,
|
| 48644 |
+
"learning_rate": 0.0004829658601056537,
|
| 48645 |
+
"loss": 14.4192,
|
| 48646 |
+
"step": 69480
|
| 48647 |
+
},
|
| 48648 |
+
{
|
| 48649 |
+
"epoch": 0.10293655825418176,
|
| 48650 |
+
"grad_norm": 6.78125,
|
| 48651 |
+
"learning_rate": 0.0004829633906381499,
|
| 48652 |
+
"loss": 14.4574,
|
| 48653 |
+
"step": 69490
|
| 48654 |
+
},
|
| 48655 |
+
{
|
| 48656 |
+
"epoch": 0.10295137140114595,
|
| 48657 |
+
"grad_norm": 6.875,
|
| 48658 |
+
"learning_rate": 0.00048296092117064616,
|
| 48659 |
+
"loss": 14.4896,
|
| 48660 |
+
"step": 69500
|
| 48661 |
+
},
|
| 48662 |
+
{
|
| 48663 |
+
"epoch": 0.10296618454811014,
|
| 48664 |
+
"grad_norm": 6.34375,
|
| 48665 |
+
"learning_rate": 0.00048295845170314236,
|
| 48666 |
+
"loss": 14.4557,
|
| 48667 |
+
"step": 69510
|
| 48668 |
+
},
|
| 48669 |
+
{
|
| 48670 |
+
"epoch": 0.10298099769507434,
|
| 48671 |
+
"grad_norm": 6.15625,
|
| 48672 |
+
"learning_rate": 0.00048295598223563855,
|
| 48673 |
+
"loss": 14.4576,
|
| 48674 |
+
"step": 69520
|
| 48675 |
+
},
|
| 48676 |
+
{
|
| 48677 |
+
"epoch": 0.10299581084203853,
|
| 48678 |
+
"grad_norm": 6.3125,
|
| 48679 |
+
"learning_rate": 0.0004829535127681348,
|
| 48680 |
+
"loss": 14.4308,
|
| 48681 |
+
"step": 69530
|
| 48682 |
+
},
|
| 48683 |
+
{
|
| 48684 |
+
"epoch": 0.10301062398900272,
|
| 48685 |
+
"grad_norm": 6.46875,
|
| 48686 |
+
"learning_rate": 0.000482951043300631,
|
| 48687 |
+
"loss": 14.4991,
|
| 48688 |
+
"step": 69540
|
| 48689 |
+
},
|
| 48690 |
+
{
|
| 48691 |
+
"epoch": 0.10302543713596692,
|
| 48692 |
+
"grad_norm": 6.1875,
|
| 48693 |
+
"learning_rate": 0.00048294857383312725,
|
| 48694 |
+
"loss": 14.4775,
|
| 48695 |
+
"step": 69550
|
| 48696 |
+
},
|
| 48697 |
+
{
|
| 48698 |
+
"epoch": 0.10304025028293111,
|
| 48699 |
+
"grad_norm": 6.1875,
|
| 48700 |
+
"learning_rate": 0.0004829461043656234,
|
| 48701 |
+
"loss": 14.4991,
|
| 48702 |
+
"step": 69560
|
| 48703 |
+
},
|
| 48704 |
+
{
|
| 48705 |
+
"epoch": 0.1030550634298953,
|
| 48706 |
+
"grad_norm": 6.4375,
|
| 48707 |
+
"learning_rate": 0.00048294363489811965,
|
| 48708 |
+
"loss": 14.4996,
|
| 48709 |
+
"step": 69570
|
| 48710 |
+
},
|
| 48711 |
+
{
|
| 48712 |
+
"epoch": 0.1030698765768595,
|
| 48713 |
+
"grad_norm": 6.09375,
|
| 48714 |
+
"learning_rate": 0.0004829411654306159,
|
| 48715 |
+
"loss": 14.4103,
|
| 48716 |
+
"step": 69580
|
| 48717 |
+
},
|
| 48718 |
+
{
|
| 48719 |
+
"epoch": 0.10308468972382369,
|
| 48720 |
+
"grad_norm": 5.90625,
|
| 48721 |
+
"learning_rate": 0.0004829386959631121,
|
| 48722 |
+
"loss": 14.433,
|
| 48723 |
+
"step": 69590
|
| 48724 |
+
},
|
| 48725 |
+
{
|
| 48726 |
+
"epoch": 0.10309950287078788,
|
| 48727 |
+
"grad_norm": 6.0625,
|
| 48728 |
+
"learning_rate": 0.0004829362264956083,
|
| 48729 |
+
"loss": 14.3899,
|
| 48730 |
+
"step": 69600
|
| 48731 |
+
},
|
| 48732 |
+
{
|
| 48733 |
+
"epoch": 0.10311431601775208,
|
| 48734 |
+
"grad_norm": 6.375,
|
| 48735 |
+
"learning_rate": 0.00048293375702810454,
|
| 48736 |
+
"loss": 14.3371,
|
| 48737 |
+
"step": 69610
|
| 48738 |
+
},
|
| 48739 |
+
{
|
| 48740 |
+
"epoch": 0.10312912916471627,
|
| 48741 |
+
"grad_norm": 6.5625,
|
| 48742 |
+
"learning_rate": 0.00048293128756060074,
|
| 48743 |
+
"loss": 14.4299,
|
| 48744 |
+
"step": 69620
|
| 48745 |
+
},
|
| 48746 |
+
{
|
| 48747 |
+
"epoch": 0.10314394231168046,
|
| 48748 |
+
"grad_norm": 6.375,
|
| 48749 |
+
"learning_rate": 0.000482928818093097,
|
| 48750 |
+
"loss": 14.4829,
|
| 48751 |
+
"step": 69630
|
| 48752 |
+
},
|
| 48753 |
+
{
|
| 48754 |
+
"epoch": 0.10315875545864465,
|
| 48755 |
+
"grad_norm": 6.875,
|
| 48756 |
+
"learning_rate": 0.0004829263486255932,
|
| 48757 |
+
"loss": 14.5233,
|
| 48758 |
+
"step": 69640
|
| 48759 |
+
},
|
| 48760 |
+
{
|
| 48761 |
+
"epoch": 0.10317356860560885,
|
| 48762 |
+
"grad_norm": 7.0,
|
| 48763 |
+
"learning_rate": 0.0004829238791580894,
|
| 48764 |
+
"loss": 14.4704,
|
| 48765 |
+
"step": 69650
|
| 48766 |
+
},
|
| 48767 |
+
{
|
| 48768 |
+
"epoch": 0.10318838175257304,
|
| 48769 |
+
"grad_norm": 5.53125,
|
| 48770 |
+
"learning_rate": 0.00048292140969058564,
|
| 48771 |
+
"loss": 14.4576,
|
| 48772 |
+
"step": 69660
|
| 48773 |
+
},
|
| 48774 |
+
{
|
| 48775 |
+
"epoch": 0.10320319489953723,
|
| 48776 |
+
"grad_norm": 6.5,
|
| 48777 |
+
"learning_rate": 0.0004829189402230818,
|
| 48778 |
+
"loss": 14.4769,
|
| 48779 |
+
"step": 69670
|
| 48780 |
+
},
|
| 48781 |
+
{
|
| 48782 |
+
"epoch": 0.10321800804650143,
|
| 48783 |
+
"grad_norm": 5.625,
|
| 48784 |
+
"learning_rate": 0.00048291647075557803,
|
| 48785 |
+
"loss": 14.4472,
|
| 48786 |
+
"step": 69680
|
| 48787 |
+
},
|
| 48788 |
+
{
|
| 48789 |
+
"epoch": 0.10323282119346562,
|
| 48790 |
+
"grad_norm": 8.8125,
|
| 48791 |
+
"learning_rate": 0.0004829140012880743,
|
| 48792 |
+
"loss": 14.3798,
|
| 48793 |
+
"step": 69690
|
| 48794 |
+
},
|
| 48795 |
+
{
|
| 48796 |
+
"epoch": 0.10324763434042981,
|
| 48797 |
+
"grad_norm": 14.625,
|
| 48798 |
+
"learning_rate": 0.0004829115318205705,
|
| 48799 |
+
"loss": 14.3795,
|
| 48800 |
+
"step": 69700
|
| 48801 |
+
},
|
| 48802 |
+
{
|
| 48803 |
+
"epoch": 0.10326244748739401,
|
| 48804 |
+
"grad_norm": 7.125,
|
| 48805 |
+
"learning_rate": 0.0004829090623530667,
|
| 48806 |
+
"loss": 14.5236,
|
| 48807 |
+
"step": 69710
|
| 48808 |
+
},
|
| 48809 |
+
{
|
| 48810 |
+
"epoch": 0.1032772606343582,
|
| 48811 |
+
"grad_norm": 6.125,
|
| 48812 |
+
"learning_rate": 0.0004829065928855629,
|
| 48813 |
+
"loss": 14.4358,
|
| 48814 |
+
"step": 69720
|
| 48815 |
+
},
|
| 48816 |
+
{
|
| 48817 |
+
"epoch": 0.1032920737813224,
|
| 48818 |
+
"grad_norm": 5.8125,
|
| 48819 |
+
"learning_rate": 0.0004829041234180591,
|
| 48820 |
+
"loss": 14.5151,
|
| 48821 |
+
"step": 69730
|
| 48822 |
+
},
|
| 48823 |
+
{
|
| 48824 |
+
"epoch": 0.10330688692828659,
|
| 48825 |
+
"grad_norm": 7.0625,
|
| 48826 |
+
"learning_rate": 0.00048290165395055537,
|
| 48827 |
+
"loss": 14.4811,
|
| 48828 |
+
"step": 69740
|
| 48829 |
+
},
|
| 48830 |
+
{
|
| 48831 |
+
"epoch": 0.10332170007525078,
|
| 48832 |
+
"grad_norm": 6.53125,
|
| 48833 |
+
"learning_rate": 0.0004828991844830515,
|
| 48834 |
+
"loss": 14.4398,
|
| 48835 |
+
"step": 69750
|
| 48836 |
+
},
|
| 48837 |
+
{
|
| 48838 |
+
"epoch": 0.10333651322221497,
|
| 48839 |
+
"grad_norm": 7.65625,
|
| 48840 |
+
"learning_rate": 0.00048289671501554777,
|
| 48841 |
+
"loss": 14.4433,
|
| 48842 |
+
"step": 69760
|
| 48843 |
+
},
|
| 48844 |
+
{
|
| 48845 |
+
"epoch": 0.10335132636917918,
|
| 48846 |
+
"grad_norm": 5.96875,
|
| 48847 |
+
"learning_rate": 0.000482894245548044,
|
| 48848 |
+
"loss": 14.5156,
|
| 48849 |
+
"step": 69770
|
| 48850 |
+
},
|
| 48851 |
+
{
|
| 48852 |
+
"epoch": 0.10336613951614337,
|
| 48853 |
+
"grad_norm": 6.5625,
|
| 48854 |
+
"learning_rate": 0.0004828917760805402,
|
| 48855 |
+
"loss": 14.5456,
|
| 48856 |
+
"step": 69780
|
| 48857 |
+
},
|
| 48858 |
+
{
|
| 48859 |
+
"epoch": 0.10338095266310757,
|
| 48860 |
+
"grad_norm": 5.9375,
|
| 48861 |
+
"learning_rate": 0.0004828893066130364,
|
| 48862 |
+
"loss": 14.4749,
|
| 48863 |
+
"step": 69790
|
| 48864 |
+
},
|
| 48865 |
+
{
|
| 48866 |
+
"epoch": 0.10339576581007176,
|
| 48867 |
+
"grad_norm": 6.1875,
|
| 48868 |
+
"learning_rate": 0.00048288683714553266,
|
| 48869 |
+
"loss": 14.3895,
|
| 48870 |
+
"step": 69800
|
| 48871 |
+
},
|
| 48872 |
+
{
|
| 48873 |
+
"epoch": 0.10341057895703595,
|
| 48874 |
+
"grad_norm": 6.53125,
|
| 48875 |
+
"learning_rate": 0.00048288436767802886,
|
| 48876 |
+
"loss": 14.4719,
|
| 48877 |
+
"step": 69810
|
| 48878 |
+
},
|
| 48879 |
+
{
|
| 48880 |
+
"epoch": 0.10342539210400015,
|
| 48881 |
+
"grad_norm": 6.6875,
|
| 48882 |
+
"learning_rate": 0.0004828818982105251,
|
| 48883 |
+
"loss": 14.439,
|
| 48884 |
+
"step": 69820
|
| 48885 |
+
},
|
| 48886 |
+
{
|
| 48887 |
+
"epoch": 0.10344020525096434,
|
| 48888 |
+
"grad_norm": 7.1875,
|
| 48889 |
+
"learning_rate": 0.0004828794287430213,
|
| 48890 |
+
"loss": 14.4329,
|
| 48891 |
+
"step": 69830
|
| 48892 |
+
},
|
| 48893 |
+
{
|
| 48894 |
+
"epoch": 0.10345501839792853,
|
| 48895 |
+
"grad_norm": 5.40625,
|
| 48896 |
+
"learning_rate": 0.0004828769592755175,
|
| 48897 |
+
"loss": 14.4353,
|
| 48898 |
+
"step": 69840
|
| 48899 |
+
},
|
| 48900 |
+
{
|
| 48901 |
+
"epoch": 0.10346983154489273,
|
| 48902 |
+
"grad_norm": 8.0625,
|
| 48903 |
+
"learning_rate": 0.00048287448980801375,
|
| 48904 |
+
"loss": 14.4982,
|
| 48905 |
+
"step": 69850
|
| 48906 |
+
},
|
| 48907 |
+
{
|
| 48908 |
+
"epoch": 0.10348464469185692,
|
| 48909 |
+
"grad_norm": 6.78125,
|
| 48910 |
+
"learning_rate": 0.0004828720203405099,
|
| 48911 |
+
"loss": 14.4344,
|
| 48912 |
+
"step": 69860
|
| 48913 |
+
},
|
| 48914 |
+
{
|
| 48915 |
+
"epoch": 0.10349945783882111,
|
| 48916 |
+
"grad_norm": 7.3125,
|
| 48917 |
+
"learning_rate": 0.00048286955087300615,
|
| 48918 |
+
"loss": 14.4419,
|
| 48919 |
+
"step": 69870
|
| 48920 |
+
},
|
| 48921 |
+
{
|
| 48922 |
+
"epoch": 0.1035142709857853,
|
| 48923 |
+
"grad_norm": 6.65625,
|
| 48924 |
+
"learning_rate": 0.0004828670814055024,
|
| 48925 |
+
"loss": 14.4333,
|
| 48926 |
+
"step": 69880
|
| 48927 |
+
},
|
| 48928 |
+
{
|
| 48929 |
+
"epoch": 0.1035290841327495,
|
| 48930 |
+
"grad_norm": 6.375,
|
| 48931 |
+
"learning_rate": 0.0004828646119379986,
|
| 48932 |
+
"loss": 14.3719,
|
| 48933 |
+
"step": 69890
|
| 48934 |
+
},
|
| 48935 |
+
{
|
| 48936 |
+
"epoch": 0.10354389727971369,
|
| 48937 |
+
"grad_norm": 8.5,
|
| 48938 |
+
"learning_rate": 0.0004828621424704948,
|
| 48939 |
+
"loss": 14.4832,
|
| 48940 |
+
"step": 69900
|
| 48941 |
+
},
|
| 48942 |
+
{
|
| 48943 |
+
"epoch": 0.10355871042667789,
|
| 48944 |
+
"grad_norm": 7.03125,
|
| 48945 |
+
"learning_rate": 0.00048285967300299104,
|
| 48946 |
+
"loss": 14.4382,
|
| 48947 |
+
"step": 69910
|
| 48948 |
+
},
|
| 48949 |
+
{
|
| 48950 |
+
"epoch": 0.10357352357364208,
|
| 48951 |
+
"grad_norm": 6.875,
|
| 48952 |
+
"learning_rate": 0.00048285720353548724,
|
| 48953 |
+
"loss": 14.417,
|
| 48954 |
+
"step": 69920
|
| 48955 |
+
},
|
| 48956 |
+
{
|
| 48957 |
+
"epoch": 0.10358833672060627,
|
| 48958 |
+
"grad_norm": 6.1875,
|
| 48959 |
+
"learning_rate": 0.0004828547340679835,
|
| 48960 |
+
"loss": 14.4657,
|
| 48961 |
+
"step": 69930
|
| 48962 |
+
},
|
| 48963 |
+
{
|
| 48964 |
+
"epoch": 0.10360314986757047,
|
| 48965 |
+
"grad_norm": 5.875,
|
| 48966 |
+
"learning_rate": 0.0004828522646004797,
|
| 48967 |
+
"loss": 14.3622,
|
| 48968 |
+
"step": 69940
|
| 48969 |
+
},
|
| 48970 |
+
{
|
| 48971 |
+
"epoch": 0.10361796301453466,
|
| 48972 |
+
"grad_norm": 6.28125,
|
| 48973 |
+
"learning_rate": 0.0004828497951329759,
|
| 48974 |
+
"loss": 14.421,
|
| 48975 |
+
"step": 69950
|
| 48976 |
+
},
|
| 48977 |
+
{
|
| 48978 |
+
"epoch": 0.10363277616149885,
|
| 48979 |
+
"grad_norm": 6.125,
|
| 48980 |
+
"learning_rate": 0.00048284732566547214,
|
| 48981 |
+
"loss": 14.4821,
|
| 48982 |
+
"step": 69960
|
| 48983 |
+
},
|
| 48984 |
+
{
|
| 48985 |
+
"epoch": 0.10364758930846305,
|
| 48986 |
+
"grad_norm": 5.8125,
|
| 48987 |
+
"learning_rate": 0.00048284485619796833,
|
| 48988 |
+
"loss": 14.4635,
|
| 48989 |
+
"step": 69970
|
| 48990 |
+
},
|
| 48991 |
+
{
|
| 48992 |
+
"epoch": 0.10366240245542724,
|
| 48993 |
+
"grad_norm": 6.5625,
|
| 48994 |
+
"learning_rate": 0.00048284238673046453,
|
| 48995 |
+
"loss": 14.3532,
|
| 48996 |
+
"step": 69980
|
| 48997 |
+
},
|
| 48998 |
+
{
|
| 48999 |
+
"epoch": 0.10367721560239143,
|
| 49000 |
+
"grad_norm": 6.09375,
|
| 49001 |
+
"learning_rate": 0.0004828399172629608,
|
| 49002 |
+
"loss": 14.5118,
|
| 49003 |
+
"step": 69990
|
| 49004 |
+
},
|
| 49005 |
+
{
|
| 49006 |
+
"epoch": 0.10369202874935562,
|
| 49007 |
+
"grad_norm": 6.28125,
|
| 49008 |
+
"learning_rate": 0.000482837447795457,
|
| 49009 |
+
"loss": 14.4856,
|
| 49010 |
+
"step": 70000
|
| 49011 |
+
},
|
| 49012 |
+
{
|
| 49013 |
+
"epoch": 0.10370684189631982,
|
| 49014 |
+
"grad_norm": 6.3125,
|
| 49015 |
+
"learning_rate": 0.0004828349783279532,
|
| 49016 |
+
"loss": 14.4556,
|
| 49017 |
+
"step": 70010
|
| 49018 |
+
},
|
| 49019 |
+
{
|
| 49020 |
+
"epoch": 0.10372165504328401,
|
| 49021 |
+
"grad_norm": 9.0625,
|
| 49022 |
+
"learning_rate": 0.0004828325088604494,
|
| 49023 |
+
"loss": 14.4778,
|
| 49024 |
+
"step": 70020
|
| 49025 |
+
},
|
| 49026 |
+
{
|
| 49027 |
+
"epoch": 0.1037364681902482,
|
| 49028 |
+
"grad_norm": 9.6875,
|
| 49029 |
+
"learning_rate": 0.0004828300393929456,
|
| 49030 |
+
"loss": 14.3201,
|
| 49031 |
+
"step": 70030
|
| 49032 |
+
},
|
| 49033 |
+
{
|
| 49034 |
+
"epoch": 0.1037512813372124,
|
| 49035 |
+
"grad_norm": 6.34375,
|
| 49036 |
+
"learning_rate": 0.00048282756992544187,
|
| 49037 |
+
"loss": 14.4223,
|
| 49038 |
+
"step": 70040
|
| 49039 |
+
},
|
| 49040 |
+
{
|
| 49041 |
+
"epoch": 0.10376609448417659,
|
| 49042 |
+
"grad_norm": 6.625,
|
| 49043 |
+
"learning_rate": 0.000482825100457938,
|
| 49044 |
+
"loss": 14.4518,
|
| 49045 |
+
"step": 70050
|
| 49046 |
+
},
|
| 49047 |
+
{
|
| 49048 |
+
"epoch": 0.10378090763114078,
|
| 49049 |
+
"grad_norm": 7.21875,
|
| 49050 |
+
"learning_rate": 0.00048282263099043427,
|
| 49051 |
+
"loss": 14.4071,
|
| 49052 |
+
"step": 70060
|
| 49053 |
+
},
|
| 49054 |
+
{
|
| 49055 |
+
"epoch": 0.10379572077810498,
|
| 49056 |
+
"grad_norm": 5.8125,
|
| 49057 |
+
"learning_rate": 0.0004828201615229305,
|
| 49058 |
+
"loss": 14.4866,
|
| 49059 |
+
"step": 70070
|
| 49060 |
+
},
|
| 49061 |
+
{
|
| 49062 |
+
"epoch": 0.10381053392506917,
|
| 49063 |
+
"grad_norm": 34.5,
|
| 49064 |
+
"learning_rate": 0.0004828176920554267,
|
| 49065 |
+
"loss": 14.4047,
|
| 49066 |
+
"step": 70080
|
| 49067 |
+
},
|
| 49068 |
+
{
|
| 49069 |
+
"epoch": 0.10382534707203338,
|
| 49070 |
+
"grad_norm": 6.125,
|
| 49071 |
+
"learning_rate": 0.0004828152225879229,
|
| 49072 |
+
"loss": 14.4204,
|
| 49073 |
+
"step": 70090
|
| 49074 |
+
},
|
| 49075 |
+
{
|
| 49076 |
+
"epoch": 0.10384016021899757,
|
| 49077 |
+
"grad_norm": 5.8125,
|
| 49078 |
+
"learning_rate": 0.00048281275312041916,
|
| 49079 |
+
"loss": 14.4449,
|
| 49080 |
+
"step": 70100
|
| 49081 |
+
},
|
| 49082 |
+
{
|
| 49083 |
+
"epoch": 0.10385497336596176,
|
| 49084 |
+
"grad_norm": 6.125,
|
| 49085 |
+
"learning_rate": 0.00048281028365291536,
|
| 49086 |
+
"loss": 14.4551,
|
| 49087 |
+
"step": 70110
|
| 49088 |
+
},
|
| 49089 |
+
{
|
| 49090 |
+
"epoch": 0.10386978651292596,
|
| 49091 |
+
"grad_norm": 6.34375,
|
| 49092 |
+
"learning_rate": 0.0004828078141854116,
|
| 49093 |
+
"loss": 14.3592,
|
| 49094 |
+
"step": 70120
|
| 49095 |
+
},
|
| 49096 |
+
{
|
| 49097 |
+
"epoch": 0.10388459965989015,
|
| 49098 |
+
"grad_norm": 5.75,
|
| 49099 |
+
"learning_rate": 0.0004828053447179078,
|
| 49100 |
+
"loss": 14.415,
|
| 49101 |
+
"step": 70130
|
| 49102 |
+
},
|
| 49103 |
+
{
|
| 49104 |
+
"epoch": 0.10389941280685434,
|
| 49105 |
+
"grad_norm": 6.90625,
|
| 49106 |
+
"learning_rate": 0.000482802875250404,
|
| 49107 |
+
"loss": 14.4047,
|
| 49108 |
+
"step": 70140
|
| 49109 |
+
},
|
| 49110 |
+
{
|
| 49111 |
+
"epoch": 0.10391422595381854,
|
| 49112 |
+
"grad_norm": 6.8125,
|
| 49113 |
+
"learning_rate": 0.00048280040578290025,
|
| 49114 |
+
"loss": 14.4879,
|
| 49115 |
+
"step": 70150
|
| 49116 |
+
},
|
| 49117 |
+
{
|
| 49118 |
+
"epoch": 0.10392903910078273,
|
| 49119 |
+
"grad_norm": 6.03125,
|
| 49120 |
+
"learning_rate": 0.00048279793631539645,
|
| 49121 |
+
"loss": 14.3857,
|
| 49122 |
+
"step": 70160
|
| 49123 |
+
},
|
| 49124 |
+
{
|
| 49125 |
+
"epoch": 0.10394385224774692,
|
| 49126 |
+
"grad_norm": 6.375,
|
| 49127 |
+
"learning_rate": 0.00048279546684789265,
|
| 49128 |
+
"loss": 14.4641,
|
| 49129 |
+
"step": 70170
|
| 49130 |
+
},
|
| 49131 |
+
{
|
| 49132 |
+
"epoch": 0.10395866539471112,
|
| 49133 |
+
"grad_norm": 6.15625,
|
| 49134 |
+
"learning_rate": 0.0004827929973803889,
|
| 49135 |
+
"loss": 14.4991,
|
| 49136 |
+
"step": 70180
|
| 49137 |
+
},
|
| 49138 |
+
{
|
| 49139 |
+
"epoch": 0.10397347854167531,
|
| 49140 |
+
"grad_norm": 6.3125,
|
| 49141 |
+
"learning_rate": 0.0004827905279128851,
|
| 49142 |
+
"loss": 14.4239,
|
| 49143 |
+
"step": 70190
|
| 49144 |
+
},
|
| 49145 |
+
{
|
| 49146 |
+
"epoch": 0.1039882916886395,
|
| 49147 |
+
"grad_norm": 6.15625,
|
| 49148 |
+
"learning_rate": 0.0004827880584453813,
|
| 49149 |
+
"loss": 14.4476,
|
| 49150 |
+
"step": 70200
|
| 49151 |
+
},
|
| 49152 |
+
{
|
| 49153 |
+
"epoch": 0.1040031048356037,
|
| 49154 |
+
"grad_norm": 7.875,
|
| 49155 |
+
"learning_rate": 0.00048278558897787754,
|
| 49156 |
+
"loss": 14.4211,
|
| 49157 |
+
"step": 70210
|
| 49158 |
+
},
|
| 49159 |
+
{
|
| 49160 |
+
"epoch": 0.10401791798256789,
|
| 49161 |
+
"grad_norm": 6.59375,
|
| 49162 |
+
"learning_rate": 0.00048278311951037374,
|
| 49163 |
+
"loss": 14.5706,
|
| 49164 |
+
"step": 70220
|
| 49165 |
+
},
|
| 49166 |
+
{
|
| 49167 |
+
"epoch": 0.10403273112953208,
|
| 49168 |
+
"grad_norm": 6.53125,
|
| 49169 |
+
"learning_rate": 0.00048278065004287,
|
| 49170 |
+
"loss": 14.4688,
|
| 49171 |
+
"step": 70230
|
| 49172 |
+
},
|
| 49173 |
+
{
|
| 49174 |
+
"epoch": 0.10404754427649628,
|
| 49175 |
+
"grad_norm": 5.84375,
|
| 49176 |
+
"learning_rate": 0.0004827781805753662,
|
| 49177 |
+
"loss": 14.436,
|
| 49178 |
+
"step": 70240
|
| 49179 |
+
},
|
| 49180 |
+
{
|
| 49181 |
+
"epoch": 0.10406235742346047,
|
| 49182 |
+
"grad_norm": 6.5,
|
| 49183 |
+
"learning_rate": 0.0004827757111078624,
|
| 49184 |
+
"loss": 14.4672,
|
| 49185 |
+
"step": 70250
|
| 49186 |
+
},
|
| 49187 |
+
{
|
| 49188 |
+
"epoch": 0.10407717057042466,
|
| 49189 |
+
"grad_norm": 5.625,
|
| 49190 |
+
"learning_rate": 0.00048277324164035864,
|
| 49191 |
+
"loss": 14.4097,
|
| 49192 |
+
"step": 70260
|
| 49193 |
+
},
|
| 49194 |
+
{
|
| 49195 |
+
"epoch": 0.10409198371738886,
|
| 49196 |
+
"grad_norm": 6.8125,
|
| 49197 |
+
"learning_rate": 0.00048277077217285483,
|
| 49198 |
+
"loss": 14.4019,
|
| 49199 |
+
"step": 70270
|
| 49200 |
+
},
|
| 49201 |
+
{
|
| 49202 |
+
"epoch": 0.10410679686435305,
|
| 49203 |
+
"grad_norm": 6.0625,
|
| 49204 |
+
"learning_rate": 0.00048276830270535103,
|
| 49205 |
+
"loss": 14.4889,
|
| 49206 |
+
"step": 70280
|
| 49207 |
+
},
|
| 49208 |
+
{
|
| 49209 |
+
"epoch": 0.10412161001131724,
|
| 49210 |
+
"grad_norm": 7.0,
|
| 49211 |
+
"learning_rate": 0.0004827658332378473,
|
| 49212 |
+
"loss": 14.4825,
|
| 49213 |
+
"step": 70290
|
| 49214 |
+
},
|
| 49215 |
+
{
|
| 49216 |
+
"epoch": 0.10413642315828144,
|
| 49217 |
+
"grad_norm": 6.875,
|
| 49218 |
+
"learning_rate": 0.0004827633637703435,
|
| 49219 |
+
"loss": 14.3975,
|
| 49220 |
+
"step": 70300
|
| 49221 |
+
},
|
| 49222 |
+
{
|
| 49223 |
+
"epoch": 0.10415123630524563,
|
| 49224 |
+
"grad_norm": 6.9375,
|
| 49225 |
+
"learning_rate": 0.00048276089430283973,
|
| 49226 |
+
"loss": 14.4059,
|
| 49227 |
+
"step": 70310
|
| 49228 |
+
},
|
| 49229 |
+
{
|
| 49230 |
+
"epoch": 0.10416604945220982,
|
| 49231 |
+
"grad_norm": 5.71875,
|
| 49232 |
+
"learning_rate": 0.0004827584248353359,
|
| 49233 |
+
"loss": 14.384,
|
| 49234 |
+
"step": 70320
|
| 49235 |
+
},
|
| 49236 |
+
{
|
| 49237 |
+
"epoch": 0.10418086259917402,
|
| 49238 |
+
"grad_norm": 6.1875,
|
| 49239 |
+
"learning_rate": 0.0004827559553678321,
|
| 49240 |
+
"loss": 14.5002,
|
| 49241 |
+
"step": 70330
|
| 49242 |
+
},
|
| 49243 |
+
{
|
| 49244 |
+
"epoch": 0.10419567574613821,
|
| 49245 |
+
"grad_norm": 5.6875,
|
| 49246 |
+
"learning_rate": 0.0004827534859003284,
|
| 49247 |
+
"loss": 14.3884,
|
| 49248 |
+
"step": 70340
|
| 49249 |
+
},
|
| 49250 |
+
{
|
| 49251 |
+
"epoch": 0.1042104888931024,
|
| 49252 |
+
"grad_norm": 14.0,
|
| 49253 |
+
"learning_rate": 0.0004827510164328245,
|
| 49254 |
+
"loss": 14.312,
|
| 49255 |
+
"step": 70350
|
| 49256 |
+
},
|
| 49257 |
+
{
|
| 49258 |
+
"epoch": 0.1042253020400666,
|
| 49259 |
+
"grad_norm": 6.5625,
|
| 49260 |
+
"learning_rate": 0.00048274854696532077,
|
| 49261 |
+
"loss": 14.4013,
|
| 49262 |
+
"step": 70360
|
| 49263 |
+
},
|
| 49264 |
+
{
|
| 49265 |
+
"epoch": 0.10424011518703079,
|
| 49266 |
+
"grad_norm": 6.75,
|
| 49267 |
+
"learning_rate": 0.000482746077497817,
|
| 49268 |
+
"loss": 14.4415,
|
| 49269 |
+
"step": 70370
|
| 49270 |
+
},
|
| 49271 |
+
{
|
| 49272 |
+
"epoch": 0.10425492833399498,
|
| 49273 |
+
"grad_norm": 5.90625,
|
| 49274 |
+
"learning_rate": 0.0004827436080303132,
|
| 49275 |
+
"loss": 14.4485,
|
| 49276 |
+
"step": 70380
|
| 49277 |
+
},
|
| 49278 |
+
{
|
| 49279 |
+
"epoch": 0.10426974148095917,
|
| 49280 |
+
"grad_norm": 6.21875,
|
| 49281 |
+
"learning_rate": 0.0004827411385628094,
|
| 49282 |
+
"loss": 14.3851,
|
| 49283 |
+
"step": 70390
|
| 49284 |
+
},
|
| 49285 |
+
{
|
| 49286 |
+
"epoch": 0.10428455462792338,
|
| 49287 |
+
"grad_norm": 6.40625,
|
| 49288 |
+
"learning_rate": 0.00048273866909530566,
|
| 49289 |
+
"loss": 14.466,
|
| 49290 |
+
"step": 70400
|
| 49291 |
+
},
|
| 49292 |
+
{
|
| 49293 |
+
"epoch": 0.10429936777488757,
|
| 49294 |
+
"grad_norm": 6.5625,
|
| 49295 |
+
"learning_rate": 0.00048273619962780186,
|
| 49296 |
+
"loss": 14.5103,
|
| 49297 |
+
"step": 70410
|
| 49298 |
+
},
|
| 49299 |
+
{
|
| 49300 |
+
"epoch": 0.10431418092185177,
|
| 49301 |
+
"grad_norm": 6.1875,
|
| 49302 |
+
"learning_rate": 0.0004827337301602981,
|
| 49303 |
+
"loss": 14.444,
|
| 49304 |
+
"step": 70420
|
| 49305 |
+
},
|
| 49306 |
+
{
|
| 49307 |
+
"epoch": 0.10432899406881596,
|
| 49308 |
+
"grad_norm": 7.78125,
|
| 49309 |
+
"learning_rate": 0.0004827312606927943,
|
| 49310 |
+
"loss": 14.4263,
|
| 49311 |
+
"step": 70430
|
| 49312 |
+
},
|
| 49313 |
+
{
|
| 49314 |
+
"epoch": 0.10434380721578015,
|
| 49315 |
+
"grad_norm": 7.65625,
|
| 49316 |
+
"learning_rate": 0.0004827287912252905,
|
| 49317 |
+
"loss": 14.412,
|
| 49318 |
+
"step": 70440
|
| 49319 |
+
},
|
| 49320 |
+
{
|
| 49321 |
+
"epoch": 0.10435862036274435,
|
| 49322 |
+
"grad_norm": 5.53125,
|
| 49323 |
+
"learning_rate": 0.00048272632175778675,
|
| 49324 |
+
"loss": 14.4303,
|
| 49325 |
+
"step": 70450
|
| 49326 |
+
},
|
| 49327 |
+
{
|
| 49328 |
+
"epoch": 0.10437343350970854,
|
| 49329 |
+
"grad_norm": 6.9375,
|
| 49330 |
+
"learning_rate": 0.00048272385229028295,
|
| 49331 |
+
"loss": 14.4522,
|
| 49332 |
+
"step": 70460
|
| 49333 |
+
},
|
| 49334 |
+
{
|
| 49335 |
+
"epoch": 0.10438824665667273,
|
| 49336 |
+
"grad_norm": 6.4375,
|
| 49337 |
+
"learning_rate": 0.00048272138282277915,
|
| 49338 |
+
"loss": 14.3848,
|
| 49339 |
+
"step": 70470
|
| 49340 |
+
},
|
| 49341 |
+
{
|
| 49342 |
+
"epoch": 0.10440305980363693,
|
| 49343 |
+
"grad_norm": 6.8125,
|
| 49344 |
+
"learning_rate": 0.0004827189133552754,
|
| 49345 |
+
"loss": 14.4066,
|
| 49346 |
+
"step": 70480
|
| 49347 |
+
},
|
| 49348 |
+
{
|
| 49349 |
+
"epoch": 0.10441787295060112,
|
| 49350 |
+
"grad_norm": 6.21875,
|
| 49351 |
+
"learning_rate": 0.0004827164438877716,
|
| 49352 |
+
"loss": 14.4463,
|
| 49353 |
+
"step": 70490
|
| 49354 |
+
},
|
| 49355 |
+
{
|
| 49356 |
+
"epoch": 0.10443268609756531,
|
| 49357 |
+
"grad_norm": 7.84375,
|
| 49358 |
+
"learning_rate": 0.00048271397442026785,
|
| 49359 |
+
"loss": 14.3453,
|
| 49360 |
+
"step": 70500
|
| 49361 |
+
},
|
| 49362 |
+
{
|
| 49363 |
+
"epoch": 0.10444749924452951,
|
| 49364 |
+
"grad_norm": 6.1875,
|
| 49365 |
+
"learning_rate": 0.00048271150495276404,
|
| 49366 |
+
"loss": 14.5356,
|
| 49367 |
+
"step": 70510
|
| 49368 |
+
},
|
| 49369 |
+
{
|
| 49370 |
+
"epoch": 0.1044623123914937,
|
| 49371 |
+
"grad_norm": 6.4375,
|
| 49372 |
+
"learning_rate": 0.00048270903548526024,
|
| 49373 |
+
"loss": 14.4272,
|
| 49374 |
+
"step": 70520
|
| 49375 |
+
},
|
| 49376 |
+
{
|
| 49377 |
+
"epoch": 0.1044771255384579,
|
| 49378 |
+
"grad_norm": 7.09375,
|
| 49379 |
+
"learning_rate": 0.0004827065660177565,
|
| 49380 |
+
"loss": 14.4296,
|
| 49381 |
+
"step": 70530
|
| 49382 |
+
},
|
| 49383 |
+
{
|
| 49384 |
+
"epoch": 0.10449193868542209,
|
| 49385 |
+
"grad_norm": 6.34375,
|
| 49386 |
+
"learning_rate": 0.00048270409655025263,
|
| 49387 |
+
"loss": 14.3646,
|
| 49388 |
+
"step": 70540
|
| 49389 |
+
},
|
| 49390 |
+
{
|
| 49391 |
+
"epoch": 0.10450675183238628,
|
| 49392 |
+
"grad_norm": 5.3125,
|
| 49393 |
+
"learning_rate": 0.0004827016270827489,
|
| 49394 |
+
"loss": 14.4788,
|
| 49395 |
+
"step": 70550
|
| 49396 |
+
},
|
| 49397 |
+
{
|
| 49398 |
+
"epoch": 0.10452156497935047,
|
| 49399 |
+
"grad_norm": 6.84375,
|
| 49400 |
+
"learning_rate": 0.00048269915761524514,
|
| 49401 |
+
"loss": 14.3832,
|
| 49402 |
+
"step": 70560
|
| 49403 |
+
},
|
| 49404 |
+
{
|
| 49405 |
+
"epoch": 0.10453637812631467,
|
| 49406 |
+
"grad_norm": 7.125,
|
| 49407 |
+
"learning_rate": 0.00048269668814774133,
|
| 49408 |
+
"loss": 14.3226,
|
| 49409 |
+
"step": 70570
|
| 49410 |
+
},
|
| 49411 |
+
{
|
| 49412 |
+
"epoch": 0.10455119127327886,
|
| 49413 |
+
"grad_norm": 6.625,
|
| 49414 |
+
"learning_rate": 0.00048269421868023753,
|
| 49415 |
+
"loss": 14.4015,
|
| 49416 |
+
"step": 70580
|
| 49417 |
+
},
|
| 49418 |
+
{
|
| 49419 |
+
"epoch": 0.10456600442024305,
|
| 49420 |
+
"grad_norm": 6.4375,
|
| 49421 |
+
"learning_rate": 0.0004826917492127338,
|
| 49422 |
+
"loss": 14.3633,
|
| 49423 |
+
"step": 70590
|
| 49424 |
+
},
|
| 49425 |
+
{
|
| 49426 |
+
"epoch": 0.10458081756720725,
|
| 49427 |
+
"grad_norm": 6.9375,
|
| 49428 |
+
"learning_rate": 0.00048268927974523,
|
| 49429 |
+
"loss": 14.3725,
|
| 49430 |
+
"step": 70600
|
| 49431 |
+
},
|
| 49432 |
+
{
|
| 49433 |
+
"epoch": 0.10459563071417144,
|
| 49434 |
+
"grad_norm": 8.125,
|
| 49435 |
+
"learning_rate": 0.00048268681027772623,
|
| 49436 |
+
"loss": 14.3893,
|
| 49437 |
+
"step": 70610
|
| 49438 |
+
},
|
| 49439 |
+
{
|
| 49440 |
+
"epoch": 0.10461044386113563,
|
| 49441 |
+
"grad_norm": 7.59375,
|
| 49442 |
+
"learning_rate": 0.0004826843408102224,
|
| 49443 |
+
"loss": 14.42,
|
| 49444 |
+
"step": 70620
|
| 49445 |
+
},
|
| 49446 |
+
{
|
| 49447 |
+
"epoch": 0.10462525700809983,
|
| 49448 |
+
"grad_norm": 19.25,
|
| 49449 |
+
"learning_rate": 0.0004826818713427186,
|
| 49450 |
+
"loss": 14.4351,
|
| 49451 |
+
"step": 70630
|
| 49452 |
+
},
|
| 49453 |
+
{
|
| 49454 |
+
"epoch": 0.10464007015506402,
|
| 49455 |
+
"grad_norm": 7.09375,
|
| 49456 |
+
"learning_rate": 0.0004826794018752149,
|
| 49457 |
+
"loss": 14.3579,
|
| 49458 |
+
"step": 70640
|
| 49459 |
+
},
|
| 49460 |
+
{
|
| 49461 |
+
"epoch": 0.10465488330202821,
|
| 49462 |
+
"grad_norm": 6.0,
|
| 49463 |
+
"learning_rate": 0.00048267693240771107,
|
| 49464 |
+
"loss": 14.4061,
|
| 49465 |
+
"step": 70650
|
| 49466 |
+
},
|
| 49467 |
+
{
|
| 49468 |
+
"epoch": 0.1046696964489924,
|
| 49469 |
+
"grad_norm": 6.15625,
|
| 49470 |
+
"learning_rate": 0.00048267446294020727,
|
| 49471 |
+
"loss": 14.3703,
|
| 49472 |
+
"step": 70660
|
| 49473 |
+
},
|
| 49474 |
+
{
|
| 49475 |
+
"epoch": 0.1046845095959566,
|
| 49476 |
+
"grad_norm": 6.625,
|
| 49477 |
+
"learning_rate": 0.0004826719934727035,
|
| 49478 |
+
"loss": 14.4221,
|
| 49479 |
+
"step": 70670
|
| 49480 |
+
},
|
| 49481 |
+
{
|
| 49482 |
+
"epoch": 0.10469932274292079,
|
| 49483 |
+
"grad_norm": 6.21875,
|
| 49484 |
+
"learning_rate": 0.0004826695240051997,
|
| 49485 |
+
"loss": 14.4084,
|
| 49486 |
+
"step": 70680
|
| 49487 |
+
},
|
| 49488 |
+
{
|
| 49489 |
+
"epoch": 0.10471413588988498,
|
| 49490 |
+
"grad_norm": 5.90625,
|
| 49491 |
+
"learning_rate": 0.0004826670545376959,
|
| 49492 |
+
"loss": 14.3775,
|
| 49493 |
+
"step": 70690
|
| 49494 |
+
},
|
| 49495 |
+
{
|
| 49496 |
+
"epoch": 0.10472894903684918,
|
| 49497 |
+
"grad_norm": 6.0625,
|
| 49498 |
+
"learning_rate": 0.00048266458507019216,
|
| 49499 |
+
"loss": 14.4421,
|
| 49500 |
+
"step": 70700
|
| 49501 |
+
},
|
| 49502 |
+
{
|
| 49503 |
+
"epoch": 0.10474376218381337,
|
| 49504 |
+
"grad_norm": 6.3125,
|
| 49505 |
+
"learning_rate": 0.00048266211560268836,
|
| 49506 |
+
"loss": 14.4759,
|
| 49507 |
+
"step": 70710
|
| 49508 |
+
},
|
| 49509 |
+
{
|
| 49510 |
+
"epoch": 0.10475857533077758,
|
| 49511 |
+
"grad_norm": 6.90625,
|
| 49512 |
+
"learning_rate": 0.0004826596461351846,
|
| 49513 |
+
"loss": 14.4147,
|
| 49514 |
+
"step": 70720
|
| 49515 |
+
},
|
| 49516 |
+
{
|
| 49517 |
+
"epoch": 0.10477338847774177,
|
| 49518 |
+
"grad_norm": 5.71875,
|
| 49519 |
+
"learning_rate": 0.0004826571766676808,
|
| 49520 |
+
"loss": 14.4619,
|
| 49521 |
+
"step": 70730
|
| 49522 |
+
},
|
| 49523 |
+
{
|
| 49524 |
+
"epoch": 0.10478820162470596,
|
| 49525 |
+
"grad_norm": 9.0,
|
| 49526 |
+
"learning_rate": 0.000482654707200177,
|
| 49527 |
+
"loss": 14.4316,
|
| 49528 |
+
"step": 70740
|
| 49529 |
+
},
|
| 49530 |
+
{
|
| 49531 |
+
"epoch": 0.10480301477167016,
|
| 49532 |
+
"grad_norm": 6.375,
|
| 49533 |
+
"learning_rate": 0.00048265223773267325,
|
| 49534 |
+
"loss": 14.5072,
|
| 49535 |
+
"step": 70750
|
| 49536 |
+
},
|
| 49537 |
+
{
|
| 49538 |
+
"epoch": 0.10481782791863435,
|
| 49539 |
+
"grad_norm": 6.4375,
|
| 49540 |
+
"learning_rate": 0.00048264976826516945,
|
| 49541 |
+
"loss": 14.4131,
|
| 49542 |
+
"step": 70760
|
| 49543 |
+
},
|
| 49544 |
+
{
|
| 49545 |
+
"epoch": 0.10483264106559854,
|
| 49546 |
+
"grad_norm": 6.375,
|
| 49547 |
+
"learning_rate": 0.00048264729879766565,
|
| 49548 |
+
"loss": 14.5537,
|
| 49549 |
+
"step": 70770
|
| 49550 |
+
},
|
| 49551 |
+
{
|
| 49552 |
+
"epoch": 0.10484745421256274,
|
| 49553 |
+
"grad_norm": 6.21875,
|
| 49554 |
+
"learning_rate": 0.0004826448293301619,
|
| 49555 |
+
"loss": 14.3948,
|
| 49556 |
+
"step": 70780
|
| 49557 |
+
},
|
| 49558 |
+
{
|
| 49559 |
+
"epoch": 0.10486226735952693,
|
| 49560 |
+
"grad_norm": 6.34375,
|
| 49561 |
+
"learning_rate": 0.0004826423598626581,
|
| 49562 |
+
"loss": 14.3598,
|
| 49563 |
+
"step": 70790
|
| 49564 |
+
},
|
| 49565 |
+
{
|
| 49566 |
+
"epoch": 0.10487708050649112,
|
| 49567 |
+
"grad_norm": 6.1875,
|
| 49568 |
+
"learning_rate": 0.00048263989039515435,
|
| 49569 |
+
"loss": 14.4712,
|
| 49570 |
+
"step": 70800
|
| 49571 |
+
},
|
| 49572 |
+
{
|
| 49573 |
+
"epoch": 0.10489189365345532,
|
| 49574 |
+
"grad_norm": 6.78125,
|
| 49575 |
+
"learning_rate": 0.00048263742092765054,
|
| 49576 |
+
"loss": 14.3404,
|
| 49577 |
+
"step": 70810
|
| 49578 |
+
},
|
| 49579 |
+
{
|
| 49580 |
+
"epoch": 0.10490670680041951,
|
| 49581 |
+
"grad_norm": 7.75,
|
| 49582 |
+
"learning_rate": 0.00048263495146014674,
|
| 49583 |
+
"loss": 14.4899,
|
| 49584 |
+
"step": 70820
|
| 49585 |
+
},
|
| 49586 |
+
{
|
| 49587 |
+
"epoch": 0.1049215199473837,
|
| 49588 |
+
"grad_norm": 6.5,
|
| 49589 |
+
"learning_rate": 0.000482632481992643,
|
| 49590 |
+
"loss": 14.4301,
|
| 49591 |
+
"step": 70830
|
| 49592 |
+
},
|
| 49593 |
+
{
|
| 49594 |
+
"epoch": 0.1049363330943479,
|
| 49595 |
+
"grad_norm": 6.75,
|
| 49596 |
+
"learning_rate": 0.00048263001252513913,
|
| 49597 |
+
"loss": 14.4358,
|
| 49598 |
+
"step": 70840
|
| 49599 |
+
},
|
| 49600 |
+
{
|
| 49601 |
+
"epoch": 0.10495114624131209,
|
| 49602 |
+
"grad_norm": 6.3125,
|
| 49603 |
+
"learning_rate": 0.0004826275430576354,
|
| 49604 |
+
"loss": 14.4049,
|
| 49605 |
+
"step": 70850
|
| 49606 |
+
},
|
| 49607 |
+
{
|
| 49608 |
+
"epoch": 0.10496595938827628,
|
| 49609 |
+
"grad_norm": 6.71875,
|
| 49610 |
+
"learning_rate": 0.00048262507359013164,
|
| 49611 |
+
"loss": 14.3964,
|
| 49612 |
+
"step": 70860
|
| 49613 |
+
},
|
| 49614 |
+
{
|
| 49615 |
+
"epoch": 0.10498077253524048,
|
| 49616 |
+
"grad_norm": 6.3125,
|
| 49617 |
+
"learning_rate": 0.00048262260412262783,
|
| 49618 |
+
"loss": 14.3938,
|
| 49619 |
+
"step": 70870
|
| 49620 |
+
},
|
| 49621 |
+
{
|
| 49622 |
+
"epoch": 0.10499558568220467,
|
| 49623 |
+
"grad_norm": 8.0625,
|
| 49624 |
+
"learning_rate": 0.00048262013465512403,
|
| 49625 |
+
"loss": 14.4033,
|
| 49626 |
+
"step": 70880
|
| 49627 |
+
},
|
| 49628 |
+
{
|
| 49629 |
+
"epoch": 0.10501039882916886,
|
| 49630 |
+
"grad_norm": 7.5625,
|
| 49631 |
+
"learning_rate": 0.0004826176651876203,
|
| 49632 |
+
"loss": 14.459,
|
| 49633 |
+
"step": 70890
|
| 49634 |
+
},
|
| 49635 |
+
{
|
| 49636 |
+
"epoch": 0.10502521197613306,
|
| 49637 |
+
"grad_norm": 6.5,
|
| 49638 |
+
"learning_rate": 0.0004826151957201165,
|
| 49639 |
+
"loss": 14.3915,
|
| 49640 |
+
"step": 70900
|
| 49641 |
+
},
|
| 49642 |
+
{
|
| 49643 |
+
"epoch": 0.10504002512309725,
|
| 49644 |
+
"grad_norm": 6.3125,
|
| 49645 |
+
"learning_rate": 0.00048261272625261273,
|
| 49646 |
+
"loss": 14.3697,
|
| 49647 |
+
"step": 70910
|
| 49648 |
+
},
|
| 49649 |
+
{
|
| 49650 |
+
"epoch": 0.10505483827006144,
|
| 49651 |
+
"grad_norm": 5.90625,
|
| 49652 |
+
"learning_rate": 0.0004826102567851089,
|
| 49653 |
+
"loss": 14.3685,
|
| 49654 |
+
"step": 70920
|
| 49655 |
+
},
|
| 49656 |
+
{
|
| 49657 |
+
"epoch": 0.10506965141702564,
|
| 49658 |
+
"grad_norm": 7.53125,
|
| 49659 |
+
"learning_rate": 0.0004826077873176051,
|
| 49660 |
+
"loss": 14.4587,
|
| 49661 |
+
"step": 70930
|
| 49662 |
+
},
|
| 49663 |
+
{
|
| 49664 |
+
"epoch": 0.10508446456398983,
|
| 49665 |
+
"grad_norm": 6.5625,
|
| 49666 |
+
"learning_rate": 0.0004826053178501014,
|
| 49667 |
+
"loss": 14.3994,
|
| 49668 |
+
"step": 70940
|
| 49669 |
+
},
|
| 49670 |
+
{
|
| 49671 |
+
"epoch": 0.10509927771095402,
|
| 49672 |
+
"grad_norm": 7.25,
|
| 49673 |
+
"learning_rate": 0.00048260284838259757,
|
| 49674 |
+
"loss": 14.4636,
|
| 49675 |
+
"step": 70950
|
| 49676 |
+
},
|
| 49677 |
+
{
|
| 49678 |
+
"epoch": 0.10511409085791822,
|
| 49679 |
+
"grad_norm": 5.8125,
|
| 49680 |
+
"learning_rate": 0.00048260037891509377,
|
| 49681 |
+
"loss": 14.4377,
|
| 49682 |
+
"step": 70960
|
| 49683 |
+
},
|
| 49684 |
+
{
|
| 49685 |
+
"epoch": 0.10512890400488241,
|
| 49686 |
+
"grad_norm": 7.09375,
|
| 49687 |
+
"learning_rate": 0.00048259790944759,
|
| 49688 |
+
"loss": 14.4444,
|
| 49689 |
+
"step": 70970
|
| 49690 |
+
},
|
| 49691 |
+
{
|
| 49692 |
+
"epoch": 0.1051437171518466,
|
| 49693 |
+
"grad_norm": 6.125,
|
| 49694 |
+
"learning_rate": 0.0004825954399800862,
|
| 49695 |
+
"loss": 14.4666,
|
| 49696 |
+
"step": 70980
|
| 49697 |
+
},
|
| 49698 |
+
{
|
| 49699 |
+
"epoch": 0.1051585302988108,
|
| 49700 |
+
"grad_norm": 5.84375,
|
| 49701 |
+
"learning_rate": 0.00048259297051258247,
|
| 49702 |
+
"loss": 14.4708,
|
| 49703 |
+
"step": 70990
|
| 49704 |
+
},
|
| 49705 |
+
{
|
| 49706 |
+
"epoch": 0.10517334344577499,
|
| 49707 |
+
"grad_norm": 6.375,
|
| 49708 |
+
"learning_rate": 0.00048259050104507866,
|
| 49709 |
+
"loss": 14.4481,
|
| 49710 |
+
"step": 71000
|
| 49711 |
+
},
|
| 49712 |
+
{
|
| 49713 |
+
"epoch": 0.10518815659273918,
|
| 49714 |
+
"grad_norm": 5.96875,
|
| 49715 |
+
"learning_rate": 0.00048258803157757486,
|
| 49716 |
+
"loss": 14.3829,
|
| 49717 |
+
"step": 71010
|
| 49718 |
+
},
|
| 49719 |
+
{
|
| 49720 |
+
"epoch": 0.10520296973970338,
|
| 49721 |
+
"grad_norm": 6.84375,
|
| 49722 |
+
"learning_rate": 0.0004825855621100711,
|
| 49723 |
+
"loss": 14.3818,
|
| 49724 |
+
"step": 71020
|
| 49725 |
+
},
|
| 49726 |
+
{
|
| 49727 |
+
"epoch": 0.10521778288666757,
|
| 49728 |
+
"grad_norm": 6.4375,
|
| 49729 |
+
"learning_rate": 0.0004825830926425673,
|
| 49730 |
+
"loss": 14.3698,
|
| 49731 |
+
"step": 71030
|
| 49732 |
+
},
|
| 49733 |
+
{
|
| 49734 |
+
"epoch": 0.10523259603363178,
|
| 49735 |
+
"grad_norm": 6.34375,
|
| 49736 |
+
"learning_rate": 0.0004825806231750635,
|
| 49737 |
+
"loss": 14.3767,
|
| 49738 |
+
"step": 71040
|
| 49739 |
+
},
|
| 49740 |
+
{
|
| 49741 |
+
"epoch": 0.10524740918059597,
|
| 49742 |
+
"grad_norm": 7.53125,
|
| 49743 |
+
"learning_rate": 0.00048257815370755976,
|
| 49744 |
+
"loss": 14.4468,
|
| 49745 |
+
"step": 71050
|
| 49746 |
+
},
|
| 49747 |
+
{
|
| 49748 |
+
"epoch": 0.10526222232756016,
|
| 49749 |
+
"grad_norm": 7.625,
|
| 49750 |
+
"learning_rate": 0.00048257568424005595,
|
| 49751 |
+
"loss": 14.4673,
|
| 49752 |
+
"step": 71060
|
| 49753 |
+
},
|
| 49754 |
+
{
|
| 49755 |
+
"epoch": 0.10527703547452436,
|
| 49756 |
+
"grad_norm": 6.6875,
|
| 49757 |
+
"learning_rate": 0.00048257321477255215,
|
| 49758 |
+
"loss": 14.4455,
|
| 49759 |
+
"step": 71070
|
| 49760 |
+
},
|
| 49761 |
+
{
|
| 49762 |
+
"epoch": 0.10529184862148855,
|
| 49763 |
+
"grad_norm": 6.5625,
|
| 49764 |
+
"learning_rate": 0.0004825707453050484,
|
| 49765 |
+
"loss": 14.4717,
|
| 49766 |
+
"step": 71080
|
| 49767 |
+
},
|
| 49768 |
+
{
|
| 49769 |
+
"epoch": 0.10530666176845274,
|
| 49770 |
+
"grad_norm": 6.46875,
|
| 49771 |
+
"learning_rate": 0.0004825682758375446,
|
| 49772 |
+
"loss": 14.3866,
|
| 49773 |
+
"step": 71090
|
| 49774 |
+
},
|
| 49775 |
+
{
|
| 49776 |
+
"epoch": 0.10532147491541693,
|
| 49777 |
+
"grad_norm": 6.65625,
|
| 49778 |
+
"learning_rate": 0.00048256580637004085,
|
| 49779 |
+
"loss": 14.398,
|
| 49780 |
+
"step": 71100
|
| 49781 |
+
},
|
| 49782 |
+
{
|
| 49783 |
+
"epoch": 0.10533628806238113,
|
| 49784 |
+
"grad_norm": 9.1875,
|
| 49785 |
+
"learning_rate": 0.00048256333690253704,
|
| 49786 |
+
"loss": 14.3839,
|
| 49787 |
+
"step": 71110
|
| 49788 |
+
},
|
| 49789 |
+
{
|
| 49790 |
+
"epoch": 0.10535110120934532,
|
| 49791 |
+
"grad_norm": 5.84375,
|
| 49792 |
+
"learning_rate": 0.00048256086743503324,
|
| 49793 |
+
"loss": 14.4169,
|
| 49794 |
+
"step": 71120
|
| 49795 |
+
},
|
| 49796 |
+
{
|
| 49797 |
+
"epoch": 0.10536591435630951,
|
| 49798 |
+
"grad_norm": 11.9375,
|
| 49799 |
+
"learning_rate": 0.0004825583979675295,
|
| 49800 |
+
"loss": 14.3776,
|
| 49801 |
+
"step": 71130
|
| 49802 |
+
},
|
| 49803 |
+
{
|
| 49804 |
+
"epoch": 0.10538072750327371,
|
| 49805 |
+
"grad_norm": 7.3125,
|
| 49806 |
+
"learning_rate": 0.0004825559285000257,
|
| 49807 |
+
"loss": 14.4793,
|
| 49808 |
+
"step": 71140
|
| 49809 |
+
},
|
| 49810 |
+
{
|
| 49811 |
+
"epoch": 0.1053955406502379,
|
| 49812 |
+
"grad_norm": 5.96875,
|
| 49813 |
+
"learning_rate": 0.0004825534590325219,
|
| 49814 |
+
"loss": 14.4644,
|
| 49815 |
+
"step": 71150
|
| 49816 |
+
},
|
| 49817 |
+
{
|
| 49818 |
+
"epoch": 0.1054103537972021,
|
| 49819 |
+
"grad_norm": 7.375,
|
| 49820 |
+
"learning_rate": 0.00048255098956501814,
|
| 49821 |
+
"loss": 14.299,
|
| 49822 |
+
"step": 71160
|
| 49823 |
+
},
|
| 49824 |
+
{
|
| 49825 |
+
"epoch": 0.10542516694416629,
|
| 49826 |
+
"grad_norm": 328.0,
|
| 49827 |
+
"learning_rate": 0.00048254852009751433,
|
| 49828 |
+
"loss": 14.4123,
|
| 49829 |
+
"step": 71170
|
| 49830 |
+
},
|
| 49831 |
+
{
|
| 49832 |
+
"epoch": 0.10543998009113048,
|
| 49833 |
+
"grad_norm": 5.8125,
|
| 49834 |
+
"learning_rate": 0.0004825460506300106,
|
| 49835 |
+
"loss": 14.3732,
|
| 49836 |
+
"step": 71180
|
| 49837 |
+
},
|
| 49838 |
+
{
|
| 49839 |
+
"epoch": 0.10545479323809467,
|
| 49840 |
+
"grad_norm": 6.40625,
|
| 49841 |
+
"learning_rate": 0.0004825435811625068,
|
| 49842 |
+
"loss": 14.2835,
|
| 49843 |
+
"step": 71190
|
| 49844 |
+
},
|
| 49845 |
+
{
|
| 49846 |
+
"epoch": 0.10546960638505887,
|
| 49847 |
+
"grad_norm": 6.875,
|
| 49848 |
+
"learning_rate": 0.000482541111695003,
|
| 49849 |
+
"loss": 14.3962,
|
| 49850 |
+
"step": 71200
|
| 49851 |
+
},
|
| 49852 |
+
{
|
| 49853 |
+
"epoch": 0.10548441953202306,
|
| 49854 |
+
"grad_norm": 6.53125,
|
| 49855 |
+
"learning_rate": 0.00048253864222749923,
|
| 49856 |
+
"loss": 14.4418,
|
| 49857 |
+
"step": 71210
|
| 49858 |
+
},
|
| 49859 |
+
{
|
| 49860 |
+
"epoch": 0.10549923267898725,
|
| 49861 |
+
"grad_norm": 6.96875,
|
| 49862 |
+
"learning_rate": 0.0004825361727599954,
|
| 49863 |
+
"loss": 14.4826,
|
| 49864 |
+
"step": 71220
|
| 49865 |
+
},
|
| 49866 |
+
{
|
| 49867 |
+
"epoch": 0.10551404582595145,
|
| 49868 |
+
"grad_norm": 6.5625,
|
| 49869 |
+
"learning_rate": 0.0004825337032924916,
|
| 49870 |
+
"loss": 14.4345,
|
| 49871 |
+
"step": 71230
|
| 49872 |
+
},
|
| 49873 |
+
{
|
| 49874 |
+
"epoch": 0.10552885897291564,
|
| 49875 |
+
"grad_norm": 5.71875,
|
| 49876 |
+
"learning_rate": 0.0004825312338249879,
|
| 49877 |
+
"loss": 14.41,
|
| 49878 |
+
"step": 71240
|
| 49879 |
+
},
|
| 49880 |
+
{
|
| 49881 |
+
"epoch": 0.10554367211987983,
|
| 49882 |
+
"grad_norm": 6.59375,
|
| 49883 |
+
"learning_rate": 0.00048252876435748407,
|
| 49884 |
+
"loss": 14.4229,
|
| 49885 |
+
"step": 71250
|
| 49886 |
+
},
|
| 49887 |
+
{
|
| 49888 |
+
"epoch": 0.10555848526684403,
|
| 49889 |
+
"grad_norm": 6.28125,
|
| 49890 |
+
"learning_rate": 0.00048252629488998027,
|
| 49891 |
+
"loss": 14.44,
|
| 49892 |
+
"step": 71260
|
| 49893 |
+
},
|
| 49894 |
+
{
|
| 49895 |
+
"epoch": 0.10557329841380822,
|
| 49896 |
+
"grad_norm": 6.21875,
|
| 49897 |
+
"learning_rate": 0.0004825238254224765,
|
| 49898 |
+
"loss": 14.3657,
|
| 49899 |
+
"step": 71270
|
| 49900 |
+
},
|
| 49901 |
+
{
|
| 49902 |
+
"epoch": 0.10558811156077241,
|
| 49903 |
+
"grad_norm": 13.9375,
|
| 49904 |
+
"learning_rate": 0.0004825213559549727,
|
| 49905 |
+
"loss": 14.4594,
|
| 49906 |
+
"step": 71280
|
| 49907 |
+
},
|
| 49908 |
+
{
|
| 49909 |
+
"epoch": 0.1056029247077366,
|
| 49910 |
+
"grad_norm": 8.3125,
|
| 49911 |
+
"learning_rate": 0.00048251888648746897,
|
| 49912 |
+
"loss": 14.4768,
|
| 49913 |
+
"step": 71290
|
| 49914 |
+
},
|
| 49915 |
+
{
|
| 49916 |
+
"epoch": 0.1056177378547008,
|
| 49917 |
+
"grad_norm": 11.625,
|
| 49918 |
+
"learning_rate": 0.00048251641701996516,
|
| 49919 |
+
"loss": 14.4579,
|
| 49920 |
+
"step": 71300
|
| 49921 |
+
},
|
| 49922 |
+
{
|
| 49923 |
+
"epoch": 0.10563255100166499,
|
| 49924 |
+
"grad_norm": 10.875,
|
| 49925 |
+
"learning_rate": 0.00048251394755246136,
|
| 49926 |
+
"loss": 14.461,
|
| 49927 |
+
"step": 71310
|
| 49928 |
+
},
|
| 49929 |
+
{
|
| 49930 |
+
"epoch": 0.10564736414862919,
|
| 49931 |
+
"grad_norm": 8.0,
|
| 49932 |
+
"learning_rate": 0.0004825114780849576,
|
| 49933 |
+
"loss": 14.3306,
|
| 49934 |
+
"step": 71320
|
| 49935 |
+
},
|
| 49936 |
+
{
|
| 49937 |
+
"epoch": 0.10566217729559338,
|
| 49938 |
+
"grad_norm": 9.1875,
|
| 49939 |
+
"learning_rate": 0.00048250900861745386,
|
| 49940 |
+
"loss": 14.4295,
|
| 49941 |
+
"step": 71330
|
| 49942 |
+
},
|
| 49943 |
+
{
|
| 49944 |
+
"epoch": 0.10567699044255757,
|
| 49945 |
+
"grad_norm": 7.84375,
|
| 49946 |
+
"learning_rate": 0.00048250653914995,
|
| 49947 |
+
"loss": 14.3986,
|
| 49948 |
+
"step": 71340
|
| 49949 |
+
},
|
| 49950 |
+
{
|
| 49951 |
+
"epoch": 0.10569180358952177,
|
| 49952 |
+
"grad_norm": 6.3125,
|
| 49953 |
+
"learning_rate": 0.00048250406968244626,
|
| 49954 |
+
"loss": 14.452,
|
| 49955 |
+
"step": 71350
|
| 49956 |
+
},
|
| 49957 |
+
{
|
| 49958 |
+
"epoch": 0.10570661673648597,
|
| 49959 |
+
"grad_norm": 7.125,
|
| 49960 |
+
"learning_rate": 0.00048250160021494245,
|
| 49961 |
+
"loss": 14.417,
|
| 49962 |
+
"step": 71360
|
| 49963 |
+
},
|
| 49964 |
+
{
|
| 49965 |
+
"epoch": 0.10572142988345017,
|
| 49966 |
+
"grad_norm": 7.53125,
|
| 49967 |
+
"learning_rate": 0.00048249913074743865,
|
| 49968 |
+
"loss": 14.3807,
|
| 49969 |
+
"step": 71370
|
| 49970 |
+
},
|
| 49971 |
+
{
|
| 49972 |
+
"epoch": 0.10573624303041436,
|
| 49973 |
+
"grad_norm": 5.65625,
|
| 49974 |
+
"learning_rate": 0.0004824966612799349,
|
| 49975 |
+
"loss": 14.4175,
|
| 49976 |
+
"step": 71380
|
| 49977 |
+
},
|
| 49978 |
+
{
|
| 49979 |
+
"epoch": 0.10575105617737855,
|
| 49980 |
+
"grad_norm": 6.125,
|
| 49981 |
+
"learning_rate": 0.0004824941918124311,
|
| 49982 |
+
"loss": 14.3421,
|
| 49983 |
+
"step": 71390
|
| 49984 |
+
},
|
| 49985 |
+
{
|
| 49986 |
+
"epoch": 0.10576586932434275,
|
| 49987 |
+
"grad_norm": 5.6875,
|
| 49988 |
+
"learning_rate": 0.00048249172234492735,
|
| 49989 |
+
"loss": 14.4374,
|
| 49990 |
+
"step": 71400
|
| 49991 |
+
},
|
| 49992 |
+
{
|
| 49993 |
+
"epoch": 0.10578068247130694,
|
| 49994 |
+
"grad_norm": 6.1875,
|
| 49995 |
+
"learning_rate": 0.00048248925287742354,
|
| 49996 |
+
"loss": 14.3676,
|
| 49997 |
+
"step": 71410
|
| 49998 |
+
},
|
| 49999 |
+
{
|
| 50000 |
+
"epoch": 0.10579549561827113,
|
| 50001 |
+
"grad_norm": 7.53125,
|
| 50002 |
+
"learning_rate": 0.00048248678340991974,
|
| 50003 |
+
"loss": 14.426,
|
| 50004 |
+
"step": 71420
|
| 50005 |
+
},
|
| 50006 |
+
{
|
| 50007 |
+
"epoch": 0.10581030876523533,
|
| 50008 |
+
"grad_norm": 7.96875,
|
| 50009 |
+
"learning_rate": 0.000482484313942416,
|
| 50010 |
+
"loss": 14.4346,
|
| 50011 |
+
"step": 71430
|
| 50012 |
+
},
|
| 50013 |
+
{
|
| 50014 |
+
"epoch": 0.10582512191219952,
|
| 50015 |
+
"grad_norm": 5.875,
|
| 50016 |
+
"learning_rate": 0.0004824818444749122,
|
| 50017 |
+
"loss": 14.4111,
|
| 50018 |
+
"step": 71440
|
| 50019 |
+
},
|
| 50020 |
+
{
|
| 50021 |
+
"epoch": 0.10583993505916371,
|
| 50022 |
+
"grad_norm": 6.375,
|
| 50023 |
+
"learning_rate": 0.0004824793750074084,
|
| 50024 |
+
"loss": 14.3728,
|
| 50025 |
+
"step": 71450
|
| 50026 |
+
},
|
| 50027 |
+
{
|
| 50028 |
+
"epoch": 0.1058547482061279,
|
| 50029 |
+
"grad_norm": 6.96875,
|
| 50030 |
+
"learning_rate": 0.00048247690553990464,
|
| 50031 |
+
"loss": 14.4942,
|
| 50032 |
+
"step": 71460
|
| 50033 |
+
},
|
| 50034 |
+
{
|
| 50035 |
+
"epoch": 0.1058695613530921,
|
| 50036 |
+
"grad_norm": 6.125,
|
| 50037 |
+
"learning_rate": 0.00048247443607240083,
|
| 50038 |
+
"loss": 14.4995,
|
| 50039 |
+
"step": 71470
|
| 50040 |
+
},
|
| 50041 |
+
{
|
| 50042 |
+
"epoch": 0.10588437450005629,
|
| 50043 |
+
"grad_norm": 5.65625,
|
| 50044 |
+
"learning_rate": 0.0004824719666048971,
|
| 50045 |
+
"loss": 14.4345,
|
| 50046 |
+
"step": 71480
|
| 50047 |
+
},
|
| 50048 |
+
{
|
| 50049 |
+
"epoch": 0.10589918764702048,
|
| 50050 |
+
"grad_norm": 6.53125,
|
| 50051 |
+
"learning_rate": 0.0004824694971373933,
|
| 50052 |
+
"loss": 14.41,
|
| 50053 |
+
"step": 71490
|
| 50054 |
+
},
|
| 50055 |
+
{
|
| 50056 |
+
"epoch": 0.10591400079398468,
|
| 50057 |
+
"grad_norm": 7.78125,
|
| 50058 |
+
"learning_rate": 0.0004824670276698895,
|
| 50059 |
+
"loss": 14.4169,
|
| 50060 |
+
"step": 71500
|
| 50061 |
+
},
|
| 50062 |
+
{
|
| 50063 |
+
"epoch": 0.10592881394094887,
|
| 50064 |
+
"grad_norm": 6.6875,
|
| 50065 |
+
"learning_rate": 0.00048246455820238573,
|
| 50066 |
+
"loss": 14.5034,
|
| 50067 |
+
"step": 71510
|
| 50068 |
+
},
|
| 50069 |
+
{
|
| 50070 |
+
"epoch": 0.10594362708791306,
|
| 50071 |
+
"grad_norm": 6.34375,
|
| 50072 |
+
"learning_rate": 0.000482462088734882,
|
| 50073 |
+
"loss": 14.3576,
|
| 50074 |
+
"step": 71520
|
| 50075 |
+
},
|
| 50076 |
+
{
|
| 50077 |
+
"epoch": 0.10595844023487726,
|
| 50078 |
+
"grad_norm": 134.0,
|
| 50079 |
+
"learning_rate": 0.0004824596192673781,
|
| 50080 |
+
"loss": 14.3604,
|
| 50081 |
+
"step": 71530
|
| 50082 |
+
},
|
| 50083 |
+
{
|
| 50084 |
+
"epoch": 0.10597325338184145,
|
| 50085 |
+
"grad_norm": 6.1875,
|
| 50086 |
+
"learning_rate": 0.0004824571497998744,
|
| 50087 |
+
"loss": 14.4725,
|
| 50088 |
+
"step": 71540
|
| 50089 |
+
},
|
| 50090 |
+
{
|
| 50091 |
+
"epoch": 0.10598806652880564,
|
| 50092 |
+
"grad_norm": 6.71875,
|
| 50093 |
+
"learning_rate": 0.00048245468033237057,
|
| 50094 |
+
"loss": 14.3752,
|
| 50095 |
+
"step": 71550
|
| 50096 |
+
},
|
| 50097 |
+
{
|
| 50098 |
+
"epoch": 0.10600287967576984,
|
| 50099 |
+
"grad_norm": 6.34375,
|
| 50100 |
+
"learning_rate": 0.00048245221086486677,
|
| 50101 |
+
"loss": 14.3763,
|
| 50102 |
+
"step": 71560
|
| 50103 |
+
},
|
| 50104 |
+
{
|
| 50105 |
+
"epoch": 0.10601769282273403,
|
| 50106 |
+
"grad_norm": 6.34375,
|
| 50107 |
+
"learning_rate": 0.000482449741397363,
|
| 50108 |
+
"loss": 14.3196,
|
| 50109 |
+
"step": 71570
|
| 50110 |
+
},
|
| 50111 |
+
{
|
| 50112 |
+
"epoch": 0.10603250596969822,
|
| 50113 |
+
"grad_norm": 7.71875,
|
| 50114 |
+
"learning_rate": 0.0004824472719298592,
|
| 50115 |
+
"loss": 14.3504,
|
| 50116 |
+
"step": 71580
|
| 50117 |
+
},
|
| 50118 |
+
{
|
| 50119 |
+
"epoch": 0.10604731911666242,
|
| 50120 |
+
"grad_norm": 8.25,
|
| 50121 |
+
"learning_rate": 0.00048244480246235547,
|
| 50122 |
+
"loss": 14.3716,
|
| 50123 |
+
"step": 71590
|
| 50124 |
+
},
|
| 50125 |
+
{
|
| 50126 |
+
"epoch": 0.10606213226362661,
|
| 50127 |
+
"grad_norm": 6.09375,
|
| 50128 |
+
"learning_rate": 0.00048244233299485166,
|
| 50129 |
+
"loss": 14.384,
|
| 50130 |
+
"step": 71600
|
| 50131 |
+
},
|
| 50132 |
+
{
|
| 50133 |
+
"epoch": 0.1060769454105908,
|
| 50134 |
+
"grad_norm": 7.9375,
|
| 50135 |
+
"learning_rate": 0.00048243986352734786,
|
| 50136 |
+
"loss": 14.4166,
|
| 50137 |
+
"step": 71610
|
| 50138 |
+
},
|
| 50139 |
+
{
|
| 50140 |
+
"epoch": 0.106091758557555,
|
| 50141 |
+
"grad_norm": 6.6875,
|
| 50142 |
+
"learning_rate": 0.0004824373940598441,
|
| 50143 |
+
"loss": 14.4795,
|
| 50144 |
+
"step": 71620
|
| 50145 |
+
},
|
| 50146 |
+
{
|
| 50147 |
+
"epoch": 0.10610657170451919,
|
| 50148 |
+
"grad_norm": 7.375,
|
| 50149 |
+
"learning_rate": 0.00048243492459234036,
|
| 50150 |
+
"loss": 14.5364,
|
| 50151 |
+
"step": 71630
|
| 50152 |
+
},
|
| 50153 |
+
{
|
| 50154 |
+
"epoch": 0.10612138485148338,
|
| 50155 |
+
"grad_norm": 6.375,
|
| 50156 |
+
"learning_rate": 0.0004824324551248365,
|
| 50157 |
+
"loss": 14.3576,
|
| 50158 |
+
"step": 71640
|
| 50159 |
+
},
|
| 50160 |
+
{
|
| 50161 |
+
"epoch": 0.10613619799844758,
|
| 50162 |
+
"grad_norm": 5.90625,
|
| 50163 |
+
"learning_rate": 0.00048242998565733276,
|
| 50164 |
+
"loss": 14.3811,
|
| 50165 |
+
"step": 71650
|
| 50166 |
+
},
|
| 50167 |
+
{
|
| 50168 |
+
"epoch": 0.10615101114541177,
|
| 50169 |
+
"grad_norm": 6.75,
|
| 50170 |
+
"learning_rate": 0.00048242751618982895,
|
| 50171 |
+
"loss": 14.3575,
|
| 50172 |
+
"step": 71660
|
| 50173 |
+
},
|
| 50174 |
+
{
|
| 50175 |
+
"epoch": 0.10616582429237596,
|
| 50176 |
+
"grad_norm": 7.09375,
|
| 50177 |
+
"learning_rate": 0.0004824250467223252,
|
| 50178 |
+
"loss": 14.427,
|
| 50179 |
+
"step": 71670
|
| 50180 |
+
},
|
| 50181 |
+
{
|
| 50182 |
+
"epoch": 0.10618063743934017,
|
| 50183 |
+
"grad_norm": 6.71875,
|
| 50184 |
+
"learning_rate": 0.0004824225772548214,
|
| 50185 |
+
"loss": 14.417,
|
| 50186 |
+
"step": 71680
|
| 50187 |
+
},
|
| 50188 |
+
{
|
| 50189 |
+
"epoch": 0.10619545058630436,
|
| 50190 |
+
"grad_norm": 5.8125,
|
| 50191 |
+
"learning_rate": 0.0004824201077873176,
|
| 50192 |
+
"loss": 14.2939,
|
| 50193 |
+
"step": 71690
|
| 50194 |
+
},
|
| 50195 |
+
{
|
| 50196 |
+
"epoch": 0.10621026373326856,
|
| 50197 |
+
"grad_norm": 12.25,
|
| 50198 |
+
"learning_rate": 0.00048241763831981385,
|
| 50199 |
+
"loss": 14.4262,
|
| 50200 |
+
"step": 71700
|
| 50201 |
+
},
|
| 50202 |
+
{
|
| 50203 |
+
"epoch": 0.10622507688023275,
|
| 50204 |
+
"grad_norm": 7.09375,
|
| 50205 |
+
"learning_rate": 0.00048241516885231005,
|
| 50206 |
+
"loss": 14.4784,
|
| 50207 |
+
"step": 71710
|
| 50208 |
+
},
|
| 50209 |
+
{
|
| 50210 |
+
"epoch": 0.10623989002719694,
|
| 50211 |
+
"grad_norm": 6.53125,
|
| 50212 |
+
"learning_rate": 0.00048241269938480624,
|
| 50213 |
+
"loss": 14.4509,
|
| 50214 |
+
"step": 71720
|
| 50215 |
+
},
|
| 50216 |
+
{
|
| 50217 |
+
"epoch": 0.10625470317416114,
|
| 50218 |
+
"grad_norm": 8.875,
|
| 50219 |
+
"learning_rate": 0.0004824102299173025,
|
| 50220 |
+
"loss": 14.383,
|
| 50221 |
+
"step": 71730
|
| 50222 |
+
},
|
| 50223 |
+
{
|
| 50224 |
+
"epoch": 0.10626951632112533,
|
| 50225 |
+
"grad_norm": 5.8125,
|
| 50226 |
+
"learning_rate": 0.0004824077604497987,
|
| 50227 |
+
"loss": 14.4117,
|
| 50228 |
+
"step": 71740
|
| 50229 |
+
},
|
| 50230 |
+
{
|
| 50231 |
+
"epoch": 0.10628432946808952,
|
| 50232 |
+
"grad_norm": 5.1875,
|
| 50233 |
+
"learning_rate": 0.0004824052909822949,
|
| 50234 |
+
"loss": 14.467,
|
| 50235 |
+
"step": 71750
|
| 50236 |
+
},
|
| 50237 |
+
{
|
| 50238 |
+
"epoch": 0.10629914261505372,
|
| 50239 |
+
"grad_norm": 8.125,
|
| 50240 |
+
"learning_rate": 0.00048240282151479114,
|
| 50241 |
+
"loss": 14.392,
|
| 50242 |
+
"step": 71760
|
| 50243 |
+
},
|
| 50244 |
+
{
|
| 50245 |
+
"epoch": 0.10631395576201791,
|
| 50246 |
+
"grad_norm": 5.90625,
|
| 50247 |
+
"learning_rate": 0.00048240035204728733,
|
| 50248 |
+
"loss": 14.335,
|
| 50249 |
+
"step": 71770
|
| 50250 |
+
},
|
| 50251 |
+
{
|
| 50252 |
+
"epoch": 0.1063287689089821,
|
| 50253 |
+
"grad_norm": 5.84375,
|
| 50254 |
+
"learning_rate": 0.0004823978825797836,
|
| 50255 |
+
"loss": 14.3728,
|
| 50256 |
+
"step": 71780
|
| 50257 |
+
},
|
| 50258 |
+
{
|
| 50259 |
+
"epoch": 0.1063435820559463,
|
| 50260 |
+
"grad_norm": 5.5625,
|
| 50261 |
+
"learning_rate": 0.0004823954131122798,
|
| 50262 |
+
"loss": 14.4237,
|
| 50263 |
+
"step": 71790
|
| 50264 |
+
},
|
| 50265 |
+
{
|
| 50266 |
+
"epoch": 0.10635839520291049,
|
| 50267 |
+
"grad_norm": 5.9375,
|
| 50268 |
+
"learning_rate": 0.000482392943644776,
|
| 50269 |
+
"loss": 14.3389,
|
| 50270 |
+
"step": 71800
|
| 50271 |
+
},
|
| 50272 |
+
{
|
| 50273 |
+
"epoch": 0.10637320834987468,
|
| 50274 |
+
"grad_norm": 5.8125,
|
| 50275 |
+
"learning_rate": 0.00048239047417727223,
|
| 50276 |
+
"loss": 14.2562,
|
| 50277 |
+
"step": 71810
|
| 50278 |
+
},
|
| 50279 |
+
{
|
| 50280 |
+
"epoch": 0.10638802149683887,
|
| 50281 |
+
"grad_norm": 6.46875,
|
| 50282 |
+
"learning_rate": 0.0004823880047097685,
|
| 50283 |
+
"loss": 14.4121,
|
| 50284 |
+
"step": 71820
|
| 50285 |
+
},
|
| 50286 |
+
{
|
| 50287 |
+
"epoch": 0.10640283464380307,
|
| 50288 |
+
"grad_norm": 14.875,
|
| 50289 |
+
"learning_rate": 0.0004823855352422646,
|
| 50290 |
+
"loss": 14.5303,
|
| 50291 |
+
"step": 71830
|
| 50292 |
+
},
|
| 50293 |
+
{
|
| 50294 |
+
"epoch": 0.10641764779076726,
|
| 50295 |
+
"grad_norm": 37.75,
|
| 50296 |
+
"learning_rate": 0.0004823830657747609,
|
| 50297 |
+
"loss": 14.36,
|
| 50298 |
+
"step": 71840
|
| 50299 |
+
},
|
| 50300 |
+
{
|
| 50301 |
+
"epoch": 0.10643246093773145,
|
| 50302 |
+
"grad_norm": 6.3125,
|
| 50303 |
+
"learning_rate": 0.00048238059630725707,
|
| 50304 |
+
"loss": 14.332,
|
| 50305 |
+
"step": 71850
|
| 50306 |
+
},
|
| 50307 |
+
{
|
| 50308 |
+
"epoch": 0.10644727408469565,
|
| 50309 |
+
"grad_norm": 10.1875,
|
| 50310 |
+
"learning_rate": 0.00048237812683975327,
|
| 50311 |
+
"loss": 14.356,
|
| 50312 |
+
"step": 71860
|
| 50313 |
+
},
|
| 50314 |
+
{
|
| 50315 |
+
"epoch": 0.10646208723165984,
|
| 50316 |
+
"grad_norm": 7.71875,
|
| 50317 |
+
"learning_rate": 0.0004823756573722495,
|
| 50318 |
+
"loss": 14.4466,
|
| 50319 |
+
"step": 71870
|
| 50320 |
+
},
|
| 50321 |
+
{
|
| 50322 |
+
"epoch": 0.10647690037862403,
|
| 50323 |
+
"grad_norm": 6.0,
|
| 50324 |
+
"learning_rate": 0.0004823731879047457,
|
| 50325 |
+
"loss": 14.3945,
|
| 50326 |
+
"step": 71880
|
| 50327 |
+
},
|
| 50328 |
+
{
|
| 50329 |
+
"epoch": 0.10649171352558823,
|
| 50330 |
+
"grad_norm": 22.875,
|
| 50331 |
+
"learning_rate": 0.00048237071843724197,
|
| 50332 |
+
"loss": 14.3465,
|
| 50333 |
+
"step": 71890
|
| 50334 |
+
},
|
| 50335 |
+
{
|
| 50336 |
+
"epoch": 0.10650652667255242,
|
| 50337 |
+
"grad_norm": 8.6875,
|
| 50338 |
+
"learning_rate": 0.00048236824896973816,
|
| 50339 |
+
"loss": 14.4183,
|
| 50340 |
+
"step": 71900
|
| 50341 |
+
},
|
| 50342 |
+
{
|
| 50343 |
+
"epoch": 0.10652133981951661,
|
| 50344 |
+
"grad_norm": 6.71875,
|
| 50345 |
+
"learning_rate": 0.00048236577950223436,
|
| 50346 |
+
"loss": 14.3305,
|
| 50347 |
+
"step": 71910
|
| 50348 |
+
},
|
| 50349 |
+
{
|
| 50350 |
+
"epoch": 0.10653615296648081,
|
| 50351 |
+
"grad_norm": 6.25,
|
| 50352 |
+
"learning_rate": 0.0004823633100347306,
|
| 50353 |
+
"loss": 14.3703,
|
| 50354 |
+
"step": 71920
|
| 50355 |
+
},
|
| 50356 |
+
{
|
| 50357 |
+
"epoch": 0.106550966113445,
|
| 50358 |
+
"grad_norm": 6.71875,
|
| 50359 |
+
"learning_rate": 0.00048236084056722686,
|
| 50360 |
+
"loss": 14.3641,
|
| 50361 |
+
"step": 71930
|
| 50362 |
+
},
|
| 50363 |
+
{
|
| 50364 |
+
"epoch": 0.1065657792604092,
|
| 50365 |
+
"grad_norm": 7.875,
|
| 50366 |
+
"learning_rate": 0.000482358371099723,
|
| 50367 |
+
"loss": 14.3658,
|
| 50368 |
+
"step": 71940
|
| 50369 |
+
},
|
| 50370 |
+
{
|
| 50371 |
+
"epoch": 0.10658059240737339,
|
| 50372 |
+
"grad_norm": 8.0,
|
| 50373 |
+
"learning_rate": 0.00048235590163221926,
|
| 50374 |
+
"loss": 14.3796,
|
| 50375 |
+
"step": 71950
|
| 50376 |
+
},
|
| 50377 |
+
{
|
| 50378 |
+
"epoch": 0.10659540555433758,
|
| 50379 |
+
"grad_norm": 5.78125,
|
| 50380 |
+
"learning_rate": 0.00048235343216471545,
|
| 50381 |
+
"loss": 14.3714,
|
| 50382 |
+
"step": 71960
|
| 50383 |
+
},
|
| 50384 |
+
{
|
| 50385 |
+
"epoch": 0.10661021870130177,
|
| 50386 |
+
"grad_norm": 7.28125,
|
| 50387 |
+
"learning_rate": 0.0004823509626972117,
|
| 50388 |
+
"loss": 14.3345,
|
| 50389 |
+
"step": 71970
|
| 50390 |
+
},
|
| 50391 |
+
{
|
| 50392 |
+
"epoch": 0.10662503184826597,
|
| 50393 |
+
"grad_norm": 6.375,
|
| 50394 |
+
"learning_rate": 0.0004823484932297079,
|
| 50395 |
+
"loss": 14.4057,
|
| 50396 |
+
"step": 71980
|
| 50397 |
+
},
|
| 50398 |
+
{
|
| 50399 |
+
"epoch": 0.10663984499523017,
|
| 50400 |
+
"grad_norm": 5.65625,
|
| 50401 |
+
"learning_rate": 0.0004823460237622041,
|
| 50402 |
+
"loss": 14.4317,
|
| 50403 |
+
"step": 71990
|
| 50404 |
+
},
|
| 50405 |
+
{
|
| 50406 |
+
"epoch": 0.10665465814219437,
|
| 50407 |
+
"grad_norm": 6.5625,
|
| 50408 |
+
"learning_rate": 0.00048234355429470035,
|
| 50409 |
+
"loss": 14.3753,
|
| 50410 |
+
"step": 72000
|
| 50411 |
}
|
| 50412 |
],
|
| 50413 |
"logging_steps": 10,
|
|
|
|
| 50427 |
"attributes": {}
|
| 50428 |
}
|
| 50429 |
},
|
| 50430 |
+
"total_flos": 1.5559474665781566e+20,
|
| 50431 |
"train_batch_size": 48,
|
| 50432 |
"trial_name": null,
|
| 50433 |
"trial_params": null
|
last-checkpoint/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5432
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ddac1e1408e6de2b20509372421fa4e906815c8d1b6c37e768ca28dc8765d1b7
|
| 3 |
size 5432
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1032262338
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50a9561c05504384f8a5358ec95beca3b2814433f472420dc733ce1dc363e842
|
| 3 |
size 1032262338
|
runs/Sep21_12-15-19_nid006869/events.out.tfevents.1758449727.nid006869.65351.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:155723adf5c79bfdce72ead24549f974e4742ac3c1d01e8da580a816cc303c13
|
| 3 |
+
size 307627
|
runs/Sep21_22-35-48_nid006862/events.out.tfevents.1758486957.nid006862.27495.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ad8b8d13e4fdf23f8aaa7dd19ecd65b8076254f307ffc7f4923ad34e4c5c050
|
| 3 |
+
size 91337
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5432
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ddac1e1408e6de2b20509372421fa4e906815c8d1b6c37e768ca28dc8765d1b7
|
| 3 |
size 5432
|