Instructions to use kurtpayne/skillscan-deberta-adapter with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use kurtpayne/skillscan-deberta-adapter with PEFT:
from peft import PeftModel from transformers import AutoModelForSequenceClassification base_model = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base") model = PeftModel.from_pretrained(base_model, "kurtpayne/skillscan-deberta-adapter") - Transformers
How to use kurtpayne/skillscan-deberta-adapter with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-classification", model="kurtpayne/skillscan-deberta-adapter")# Load model directly from transformers import AutoTokenizer, AutoModelForSequenceClassification tokenizer = AutoTokenizer.from_pretrained("kurtpayne/skillscan-deberta-adapter") model = AutoModelForSequenceClassification.from_pretrained("kurtpayne/skillscan-deberta-adapter") - Notebooks
- Google Colab
- Kaggle
Training in progress, epoch 6, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 41326816
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:112bc0f3b1b2dc5eb8f630066e354cf482957d3ac748abe391ee31395fad762e
|
| 3 |
size 41326816
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 82710219
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0024a85a2b2d0da2a5ffb75534854bdb966b34ff8920d045a44b4d4e6d074b04
|
| 3 |
size 82710219
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac86114ab63a824512c8a12cd10875b7e54b744aa8a31b22b1e280aabb9491a0
|
| 3 |
size 14645
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1383
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4c83bb7defda8c3f341201fe6246d0e1cf73bcb0df11b3ed7236d93a2e62019
|
| 3 |
size 1383
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:65322c8c832ca6bf70d61640f5bbef4c3ecd471fce4e418dae14f76a3034d8bd
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 0.
|
| 4 |
-
"best_model_checkpoint": "/tmp/tmpxojw69__/adapter-multilabel/checkpoint-
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -8959,6 +8959,1799 @@
|
|
| 8959 |
"eval_samples_per_second": 200.007,
|
| 8960 |
"eval_steps_per_second": 25.001,
|
| 8961 |
"step": 12735
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8962 |
}
|
| 8963 |
],
|
| 8964 |
"logging_steps": 10,
|
|
@@ -8973,7 +10766,7 @@
|
|
| 8973 |
"early_stopping_threshold": 0.0
|
| 8974 |
},
|
| 8975 |
"attributes": {
|
| 8976 |
-
"early_stopping_patience_counter":
|
| 8977 |
}
|
| 8978 |
},
|
| 8979 |
"TrainerControl": {
|
|
@@ -8982,12 +10775,12 @@
|
|
| 8982 |
"should_evaluate": false,
|
| 8983 |
"should_log": false,
|
| 8984 |
"should_save": true,
|
| 8985 |
-
"should_training_stop":
|
| 8986 |
},
|
| 8987 |
"attributes": {}
|
| 8988 |
}
|
| 8989 |
},
|
| 8990 |
-
"total_flos":
|
| 8991 |
"train_batch_size": 8,
|
| 8992 |
"trial_name": null,
|
| 8993 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 15282,
|
| 3 |
+
"best_metric": 0.19693690538406372,
|
| 4 |
+
"best_model_checkpoint": "/tmp/tmpxojw69__/adapter-multilabel/checkpoint-15282",
|
| 5 |
+
"epoch": 6.0,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 15282,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 8959 |
"eval_samples_per_second": 200.007,
|
| 8960 |
"eval_steps_per_second": 25.001,
|
| 8961 |
"step": 12735
|
| 8962 |
+
},
|
| 8963 |
+
{
|
| 8964 |
+
"epoch": 5.001963093835886,
|
| 8965 |
+
"grad_norm": 1.4495255947113037,
|
| 8966 |
+
"learning_rate": 2.773576674180179e-06,
|
| 8967 |
+
"loss": 0.0809,
|
| 8968 |
+
"step": 12740
|
| 8969 |
+
},
|
| 8970 |
+
{
|
| 8971 |
+
"epoch": 5.005889281507656,
|
| 8972 |
+
"grad_norm": 1.010282039642334,
|
| 8973 |
+
"learning_rate": 2.7626699629171816e-06,
|
| 8974 |
+
"loss": 0.0899,
|
| 8975 |
+
"step": 12750
|
| 8976 |
+
},
|
| 8977 |
+
{
|
| 8978 |
+
"epoch": 5.0098154691794266,
|
| 8979 |
+
"grad_norm": 0.13367398083209991,
|
| 8980 |
+
"learning_rate": 2.7517632516541846e-06,
|
| 8981 |
+
"loss": 0.0142,
|
| 8982 |
+
"step": 12760
|
| 8983 |
+
},
|
| 8984 |
+
{
|
| 8985 |
+
"epoch": 5.013741656851198,
|
| 8986 |
+
"grad_norm": 17.02937126159668,
|
| 8987 |
+
"learning_rate": 2.740856540391187e-06,
|
| 8988 |
+
"loss": 0.3372,
|
| 8989 |
+
"step": 12770
|
| 8990 |
+
},
|
| 8991 |
+
{
|
| 8992 |
+
"epoch": 5.017667844522968,
|
| 8993 |
+
"grad_norm": 0.005055473186075687,
|
| 8994 |
+
"learning_rate": 2.7299498291281905e-06,
|
| 8995 |
+
"loss": 0.0114,
|
| 8996 |
+
"step": 12780
|
| 8997 |
+
},
|
| 8998 |
+
{
|
| 8999 |
+
"epoch": 5.021594032194739,
|
| 9000 |
+
"grad_norm": 0.22540545463562012,
|
| 9001 |
+
"learning_rate": 2.719043117865193e-06,
|
| 9002 |
+
"loss": 0.1835,
|
| 9003 |
+
"step": 12790
|
| 9004 |
+
},
|
| 9005 |
+
{
|
| 9006 |
+
"epoch": 5.025520219866509,
|
| 9007 |
+
"grad_norm": 9.437993049621582,
|
| 9008 |
+
"learning_rate": 2.708136406602196e-06,
|
| 9009 |
+
"loss": 0.0482,
|
| 9010 |
+
"step": 12800
|
| 9011 |
+
},
|
| 9012 |
+
{
|
| 9013 |
+
"epoch": 5.0294464075382805,
|
| 9014 |
+
"grad_norm": 0.912726879119873,
|
| 9015 |
+
"learning_rate": 2.6972296953391986e-06,
|
| 9016 |
+
"loss": 0.0343,
|
| 9017 |
+
"step": 12810
|
| 9018 |
+
},
|
| 9019 |
+
{
|
| 9020 |
+
"epoch": 5.033372595210051,
|
| 9021 |
+
"grad_norm": 0.3685736656188965,
|
| 9022 |
+
"learning_rate": 2.6863229840762016e-06,
|
| 9023 |
+
"loss": 0.0036,
|
| 9024 |
+
"step": 12820
|
| 9025 |
+
},
|
| 9026 |
+
{
|
| 9027 |
+
"epoch": 5.037298782881821,
|
| 9028 |
+
"grad_norm": 0.060967061668634415,
|
| 9029 |
+
"learning_rate": 2.6754162728132046e-06,
|
| 9030 |
+
"loss": 0.0128,
|
| 9031 |
+
"step": 12830
|
| 9032 |
+
},
|
| 9033 |
+
{
|
| 9034 |
+
"epoch": 5.041224970553593,
|
| 9035 |
+
"grad_norm": 0.17684897780418396,
|
| 9036 |
+
"learning_rate": 2.664509561550207e-06,
|
| 9037 |
+
"loss": 0.0706,
|
| 9038 |
+
"step": 12840
|
| 9039 |
+
},
|
| 9040 |
+
{
|
| 9041 |
+
"epoch": 5.045151158225363,
|
| 9042 |
+
"grad_norm": 0.45533451437950134,
|
| 9043 |
+
"learning_rate": 2.6536028502872105e-06,
|
| 9044 |
+
"loss": 0.1257,
|
| 9045 |
+
"step": 12850
|
| 9046 |
+
},
|
| 9047 |
+
{
|
| 9048 |
+
"epoch": 5.049077345897134,
|
| 9049 |
+
"grad_norm": 23.107601165771484,
|
| 9050 |
+
"learning_rate": 2.642696139024213e-06,
|
| 9051 |
+
"loss": 0.1421,
|
| 9052 |
+
"step": 12860
|
| 9053 |
+
},
|
| 9054 |
+
{
|
| 9055 |
+
"epoch": 5.053003533568905,
|
| 9056 |
+
"grad_norm": 0.9842678308486938,
|
| 9057 |
+
"learning_rate": 2.631789427761216e-06,
|
| 9058 |
+
"loss": 0.0861,
|
| 9059 |
+
"step": 12870
|
| 9060 |
+
},
|
| 9061 |
+
{
|
| 9062 |
+
"epoch": 5.056929721240675,
|
| 9063 |
+
"grad_norm": 0.4415569305419922,
|
| 9064 |
+
"learning_rate": 2.6208827164982186e-06,
|
| 9065 |
+
"loss": 0.0216,
|
| 9066 |
+
"step": 12880
|
| 9067 |
+
},
|
| 9068 |
+
{
|
| 9069 |
+
"epoch": 5.060855908912446,
|
| 9070 |
+
"grad_norm": 0.09013790637254715,
|
| 9071 |
+
"learning_rate": 2.6099760052352216e-06,
|
| 9072 |
+
"loss": 0.3773,
|
| 9073 |
+
"step": 12890
|
| 9074 |
+
},
|
| 9075 |
+
{
|
| 9076 |
+
"epoch": 5.064782096584216,
|
| 9077 |
+
"grad_norm": 2.492323875427246,
|
| 9078 |
+
"learning_rate": 2.599069293972224e-06,
|
| 9079 |
+
"loss": 0.3093,
|
| 9080 |
+
"step": 12900
|
| 9081 |
+
},
|
| 9082 |
+
{
|
| 9083 |
+
"epoch": 5.068708284255988,
|
| 9084 |
+
"grad_norm": 0.0014120221603661776,
|
| 9085 |
+
"learning_rate": 2.588162582709227e-06,
|
| 9086 |
+
"loss": 0.2295,
|
| 9087 |
+
"step": 12910
|
| 9088 |
+
},
|
| 9089 |
+
{
|
| 9090 |
+
"epoch": 5.072634471927758,
|
| 9091 |
+
"grad_norm": 0.6903073191642761,
|
| 9092 |
+
"learning_rate": 2.5772558714462297e-06,
|
| 9093 |
+
"loss": 0.0299,
|
| 9094 |
+
"step": 12920
|
| 9095 |
+
},
|
| 9096 |
+
{
|
| 9097 |
+
"epoch": 5.0765606595995285,
|
| 9098 |
+
"grad_norm": 0.5149281024932861,
|
| 9099 |
+
"learning_rate": 2.566349160183233e-06,
|
| 9100 |
+
"loss": 0.0038,
|
| 9101 |
+
"step": 12930
|
| 9102 |
+
},
|
| 9103 |
+
{
|
| 9104 |
+
"epoch": 5.0804868472713,
|
| 9105 |
+
"grad_norm": 0.1371137499809265,
|
| 9106 |
+
"learning_rate": 2.5554424489202356e-06,
|
| 9107 |
+
"loss": 0.2088,
|
| 9108 |
+
"step": 12940
|
| 9109 |
+
},
|
| 9110 |
+
{
|
| 9111 |
+
"epoch": 5.08441303494307,
|
| 9112 |
+
"grad_norm": 70.09143829345703,
|
| 9113 |
+
"learning_rate": 2.5445357376572386e-06,
|
| 9114 |
+
"loss": 0.0551,
|
| 9115 |
+
"step": 12950
|
| 9116 |
+
},
|
| 9117 |
+
{
|
| 9118 |
+
"epoch": 5.088339222614841,
|
| 9119 |
+
"grad_norm": 0.018704677000641823,
|
| 9120 |
+
"learning_rate": 2.533629026394241e-06,
|
| 9121 |
+
"loss": 0.3212,
|
| 9122 |
+
"step": 12960
|
| 9123 |
+
},
|
| 9124 |
+
{
|
| 9125 |
+
"epoch": 5.092265410286612,
|
| 9126 |
+
"grad_norm": 5.903232574462891,
|
| 9127 |
+
"learning_rate": 2.522722315131244e-06,
|
| 9128 |
+
"loss": 0.2351,
|
| 9129 |
+
"step": 12970
|
| 9130 |
+
},
|
| 9131 |
+
{
|
| 9132 |
+
"epoch": 5.0961915979583825,
|
| 9133 |
+
"grad_norm": 0.45784613490104675,
|
| 9134 |
+
"learning_rate": 2.511815603868247e-06,
|
| 9135 |
+
"loss": 0.1325,
|
| 9136 |
+
"step": 12980
|
| 9137 |
+
},
|
| 9138 |
+
{
|
| 9139 |
+
"epoch": 5.100117785630153,
|
| 9140 |
+
"grad_norm": 0.3939746916294098,
|
| 9141 |
+
"learning_rate": 2.5009088926052496e-06,
|
| 9142 |
+
"loss": 0.0646,
|
| 9143 |
+
"step": 12990
|
| 9144 |
+
},
|
| 9145 |
+
{
|
| 9146 |
+
"epoch": 5.104043973301923,
|
| 9147 |
+
"grad_norm": 0.36335885524749756,
|
| 9148 |
+
"learning_rate": 2.490002181342253e-06,
|
| 9149 |
+
"loss": 0.2026,
|
| 9150 |
+
"step": 13000
|
| 9151 |
+
},
|
| 9152 |
+
{
|
| 9153 |
+
"epoch": 5.107970160973695,
|
| 9154 |
+
"grad_norm": 4.986391544342041,
|
| 9155 |
+
"learning_rate": 2.4790954700792556e-06,
|
| 9156 |
+
"loss": 0.1158,
|
| 9157 |
+
"step": 13010
|
| 9158 |
+
},
|
| 9159 |
+
{
|
| 9160 |
+
"epoch": 5.111896348645465,
|
| 9161 |
+
"grad_norm": 0.18488813936710358,
|
| 9162 |
+
"learning_rate": 2.4681887588162586e-06,
|
| 9163 |
+
"loss": 0.0048,
|
| 9164 |
+
"step": 13020
|
| 9165 |
+
},
|
| 9166 |
+
{
|
| 9167 |
+
"epoch": 5.115822536317236,
|
| 9168 |
+
"grad_norm": 3.7534120082855225,
|
| 9169 |
+
"learning_rate": 2.457282047553261e-06,
|
| 9170 |
+
"loss": 0.0204,
|
| 9171 |
+
"step": 13030
|
| 9172 |
+
},
|
| 9173 |
+
{
|
| 9174 |
+
"epoch": 5.119748723989007,
|
| 9175 |
+
"grad_norm": 0.1561623513698578,
|
| 9176 |
+
"learning_rate": 2.446375336290264e-06,
|
| 9177 |
+
"loss": 0.0287,
|
| 9178 |
+
"step": 13040
|
| 9179 |
+
},
|
| 9180 |
+
{
|
| 9181 |
+
"epoch": 5.123674911660777,
|
| 9182 |
+
"grad_norm": 0.003959027584642172,
|
| 9183 |
+
"learning_rate": 2.4354686250272667e-06,
|
| 9184 |
+
"loss": 0.1339,
|
| 9185 |
+
"step": 13050
|
| 9186 |
+
},
|
| 9187 |
+
{
|
| 9188 |
+
"epoch": 5.127601099332548,
|
| 9189 |
+
"grad_norm": 2.6318511962890625,
|
| 9190 |
+
"learning_rate": 2.4245619137642696e-06,
|
| 9191 |
+
"loss": 0.1513,
|
| 9192 |
+
"step": 13060
|
| 9193 |
+
},
|
| 9194 |
+
{
|
| 9195 |
+
"epoch": 5.131527287004319,
|
| 9196 |
+
"grad_norm": 23.85380744934082,
|
| 9197 |
+
"learning_rate": 2.413655202501272e-06,
|
| 9198 |
+
"loss": 0.0467,
|
| 9199 |
+
"step": 13070
|
| 9200 |
+
},
|
| 9201 |
+
{
|
| 9202 |
+
"epoch": 5.13545347467609,
|
| 9203 |
+
"grad_norm": 2.4958913326263428,
|
| 9204 |
+
"learning_rate": 2.4027484912382756e-06,
|
| 9205 |
+
"loss": 0.1685,
|
| 9206 |
+
"step": 13080
|
| 9207 |
+
},
|
| 9208 |
+
{
|
| 9209 |
+
"epoch": 5.13937966234786,
|
| 9210 |
+
"grad_norm": 4.380481719970703,
|
| 9211 |
+
"learning_rate": 2.391841779975278e-06,
|
| 9212 |
+
"loss": 0.0426,
|
| 9213 |
+
"step": 13090
|
| 9214 |
+
},
|
| 9215 |
+
{
|
| 9216 |
+
"epoch": 5.143305850019631,
|
| 9217 |
+
"grad_norm": 0.0490720272064209,
|
| 9218 |
+
"learning_rate": 2.380935068712281e-06,
|
| 9219 |
+
"loss": 0.1564,
|
| 9220 |
+
"step": 13100
|
| 9221 |
+
},
|
| 9222 |
+
{
|
| 9223 |
+
"epoch": 5.147232037691402,
|
| 9224 |
+
"grad_norm": 1.1204633712768555,
|
| 9225 |
+
"learning_rate": 2.3700283574492837e-06,
|
| 9226 |
+
"loss": 0.1093,
|
| 9227 |
+
"step": 13110
|
| 9228 |
+
},
|
| 9229 |
+
{
|
| 9230 |
+
"epoch": 5.151158225363172,
|
| 9231 |
+
"grad_norm": 0.0012298759538680315,
|
| 9232 |
+
"learning_rate": 2.3591216461862866e-06,
|
| 9233 |
+
"loss": 0.6134,
|
| 9234 |
+
"step": 13120
|
| 9235 |
+
},
|
| 9236 |
+
{
|
| 9237 |
+
"epoch": 5.155084413034943,
|
| 9238 |
+
"grad_norm": 0.34278765320777893,
|
| 9239 |
+
"learning_rate": 2.348214934923289e-06,
|
| 9240 |
+
"loss": 0.1132,
|
| 9241 |
+
"step": 13130
|
| 9242 |
+
},
|
| 9243 |
+
{
|
| 9244 |
+
"epoch": 5.159010600706714,
|
| 9245 |
+
"grad_norm": 0.8987643718719482,
|
| 9246 |
+
"learning_rate": 2.337308223660292e-06,
|
| 9247 |
+
"loss": 0.0614,
|
| 9248 |
+
"step": 13140
|
| 9249 |
+
},
|
| 9250 |
+
{
|
| 9251 |
+
"epoch": 5.1629367883784845,
|
| 9252 |
+
"grad_norm": 236.4158935546875,
|
| 9253 |
+
"learning_rate": 2.3264015123972956e-06,
|
| 9254 |
+
"loss": 0.2959,
|
| 9255 |
+
"step": 13150
|
| 9256 |
+
},
|
| 9257 |
+
{
|
| 9258 |
+
"epoch": 5.166862976050255,
|
| 9259 |
+
"grad_norm": 0.5242019891738892,
|
| 9260 |
+
"learning_rate": 2.315494801134298e-06,
|
| 9261 |
+
"loss": 0.1277,
|
| 9262 |
+
"step": 13160
|
| 9263 |
+
},
|
| 9264 |
+
{
|
| 9265 |
+
"epoch": 5.170789163722026,
|
| 9266 |
+
"grad_norm": 3.4700469970703125,
|
| 9267 |
+
"learning_rate": 2.304588089871301e-06,
|
| 9268 |
+
"loss": 0.2921,
|
| 9269 |
+
"step": 13170
|
| 9270 |
+
},
|
| 9271 |
+
{
|
| 9272 |
+
"epoch": 5.174715351393797,
|
| 9273 |
+
"grad_norm": 0.07389583438634872,
|
| 9274 |
+
"learning_rate": 2.2936813786083037e-06,
|
| 9275 |
+
"loss": 0.1182,
|
| 9276 |
+
"step": 13180
|
| 9277 |
+
},
|
| 9278 |
+
{
|
| 9279 |
+
"epoch": 5.178641539065567,
|
| 9280 |
+
"grad_norm": 0.9954978823661804,
|
| 9281 |
+
"learning_rate": 2.2827746673453066e-06,
|
| 9282 |
+
"loss": 0.1724,
|
| 9283 |
+
"step": 13190
|
| 9284 |
+
},
|
| 9285 |
+
{
|
| 9286 |
+
"epoch": 5.1825677267373385,
|
| 9287 |
+
"grad_norm": 1.2880812883377075,
|
| 9288 |
+
"learning_rate": 2.271867956082309e-06,
|
| 9289 |
+
"loss": 0.1925,
|
| 9290 |
+
"step": 13200
|
| 9291 |
+
},
|
| 9292 |
+
{
|
| 9293 |
+
"epoch": 5.186493914409109,
|
| 9294 |
+
"grad_norm": 0.2947271764278412,
|
| 9295 |
+
"learning_rate": 2.260961244819312e-06,
|
| 9296 |
+
"loss": 0.0891,
|
| 9297 |
+
"step": 13210
|
| 9298 |
+
},
|
| 9299 |
+
{
|
| 9300 |
+
"epoch": 5.190420102080879,
|
| 9301 |
+
"grad_norm": 1.8062124252319336,
|
| 9302 |
+
"learning_rate": 2.250054533556315e-06,
|
| 9303 |
+
"loss": 0.1711,
|
| 9304 |
+
"step": 13220
|
| 9305 |
+
},
|
| 9306 |
+
{
|
| 9307 |
+
"epoch": 5.19434628975265,
|
| 9308 |
+
"grad_norm": 1.5765048265457153,
|
| 9309 |
+
"learning_rate": 2.239147822293318e-06,
|
| 9310 |
+
"loss": 0.2276,
|
| 9311 |
+
"step": 13230
|
| 9312 |
+
},
|
| 9313 |
+
{
|
| 9314 |
+
"epoch": 5.198272477424421,
|
| 9315 |
+
"grad_norm": 36.99755859375,
|
| 9316 |
+
"learning_rate": 2.2282411110303207e-06,
|
| 9317 |
+
"loss": 0.0268,
|
| 9318 |
+
"step": 13240
|
| 9319 |
+
},
|
| 9320 |
+
{
|
| 9321 |
+
"epoch": 5.2021986650961916,
|
| 9322 |
+
"grad_norm": 0.04058028385043144,
|
| 9323 |
+
"learning_rate": 2.2173343997673236e-06,
|
| 9324 |
+
"loss": 0.0637,
|
| 9325 |
+
"step": 13250
|
| 9326 |
+
},
|
| 9327 |
+
{
|
| 9328 |
+
"epoch": 5.206124852767962,
|
| 9329 |
+
"grad_norm": 0.8288754820823669,
|
| 9330 |
+
"learning_rate": 2.206427688504326e-06,
|
| 9331 |
+
"loss": 0.3138,
|
| 9332 |
+
"step": 13260
|
| 9333 |
+
},
|
| 9334 |
+
{
|
| 9335 |
+
"epoch": 5.210051040439733,
|
| 9336 |
+
"grad_norm": 19.756397247314453,
|
| 9337 |
+
"learning_rate": 2.195520977241329e-06,
|
| 9338 |
+
"loss": 0.0867,
|
| 9339 |
+
"step": 13270
|
| 9340 |
+
},
|
| 9341 |
+
{
|
| 9342 |
+
"epoch": 5.213977228111504,
|
| 9343 |
+
"grad_norm": 0.07695566117763519,
|
| 9344 |
+
"learning_rate": 2.1846142659783317e-06,
|
| 9345 |
+
"loss": 0.021,
|
| 9346 |
+
"step": 13280
|
| 9347 |
+
},
|
| 9348 |
+
{
|
| 9349 |
+
"epoch": 5.217903415783274,
|
| 9350 |
+
"grad_norm": 0.0034018950536847115,
|
| 9351 |
+
"learning_rate": 2.1737075547153347e-06,
|
| 9352 |
+
"loss": 0.0148,
|
| 9353 |
+
"step": 13290
|
| 9354 |
+
},
|
| 9355 |
+
{
|
| 9356 |
+
"epoch": 5.2218296034550455,
|
| 9357 |
+
"grad_norm": 0.2699490487575531,
|
| 9358 |
+
"learning_rate": 2.162800843452338e-06,
|
| 9359 |
+
"loss": 0.1326,
|
| 9360 |
+
"step": 13300
|
| 9361 |
+
},
|
| 9362 |
+
{
|
| 9363 |
+
"epoch": 5.225755791126816,
|
| 9364 |
+
"grad_norm": 0.3341562747955322,
|
| 9365 |
+
"learning_rate": 2.1518941321893407e-06,
|
| 9366 |
+
"loss": 0.0725,
|
| 9367 |
+
"step": 13310
|
| 9368 |
+
},
|
| 9369 |
+
{
|
| 9370 |
+
"epoch": 5.229681978798586,
|
| 9371 |
+
"grad_norm": 1.3519041538238525,
|
| 9372 |
+
"learning_rate": 2.1409874209263436e-06,
|
| 9373 |
+
"loss": 0.4665,
|
| 9374 |
+
"step": 13320
|
| 9375 |
+
},
|
| 9376 |
+
{
|
| 9377 |
+
"epoch": 5.233608166470357,
|
| 9378 |
+
"grad_norm": 1.233429193496704,
|
| 9379 |
+
"learning_rate": 2.130080709663346e-06,
|
| 9380 |
+
"loss": 0.0432,
|
| 9381 |
+
"step": 13330
|
| 9382 |
+
},
|
| 9383 |
+
{
|
| 9384 |
+
"epoch": 5.237534354142128,
|
| 9385 |
+
"grad_norm": 0.17115157842636108,
|
| 9386 |
+
"learning_rate": 2.119173998400349e-06,
|
| 9387 |
+
"loss": 0.0434,
|
| 9388 |
+
"step": 13340
|
| 9389 |
+
},
|
| 9390 |
+
{
|
| 9391 |
+
"epoch": 5.241460541813899,
|
| 9392 |
+
"grad_norm": 0.15809869766235352,
|
| 9393 |
+
"learning_rate": 2.1082672871373517e-06,
|
| 9394 |
+
"loss": 0.0168,
|
| 9395 |
+
"step": 13350
|
| 9396 |
+
},
|
| 9397 |
+
{
|
| 9398 |
+
"epoch": 5.245386729485669,
|
| 9399 |
+
"grad_norm": 0.15708252787590027,
|
| 9400 |
+
"learning_rate": 2.0973605758743547e-06,
|
| 9401 |
+
"loss": 0.1109,
|
| 9402 |
+
"step": 13360
|
| 9403 |
+
},
|
| 9404 |
+
{
|
| 9405 |
+
"epoch": 5.24931291715744,
|
| 9406 |
+
"grad_norm": 0.01780679263174534,
|
| 9407 |
+
"learning_rate": 2.0864538646113577e-06,
|
| 9408 |
+
"loss": 0.2882,
|
| 9409 |
+
"step": 13370
|
| 9410 |
+
},
|
| 9411 |
+
{
|
| 9412 |
+
"epoch": 5.253239104829211,
|
| 9413 |
+
"grad_norm": 0.4691922068595886,
|
| 9414 |
+
"learning_rate": 2.0755471533483606e-06,
|
| 9415 |
+
"loss": 0.0458,
|
| 9416 |
+
"step": 13380
|
| 9417 |
+
},
|
| 9418 |
+
{
|
| 9419 |
+
"epoch": 5.257165292500981,
|
| 9420 |
+
"grad_norm": 3.2137491703033447,
|
| 9421 |
+
"learning_rate": 2.064640442085363e-06,
|
| 9422 |
+
"loss": 0.075,
|
| 9423 |
+
"step": 13390
|
| 9424 |
+
},
|
| 9425 |
+
{
|
| 9426 |
+
"epoch": 5.261091480172753,
|
| 9427 |
+
"grad_norm": 0.05457557737827301,
|
| 9428 |
+
"learning_rate": 2.053733730822366e-06,
|
| 9429 |
+
"loss": 0.0244,
|
| 9430 |
+
"step": 13400
|
| 9431 |
+
},
|
| 9432 |
+
{
|
| 9433 |
+
"epoch": 5.265017667844523,
|
| 9434 |
+
"grad_norm": 3.349138021469116,
|
| 9435 |
+
"learning_rate": 2.0428270195593687e-06,
|
| 9436 |
+
"loss": 0.0189,
|
| 9437 |
+
"step": 13410
|
| 9438 |
+
},
|
| 9439 |
+
{
|
| 9440 |
+
"epoch": 5.2689438555162935,
|
| 9441 |
+
"grad_norm": 0.4423637092113495,
|
| 9442 |
+
"learning_rate": 2.0319203082963717e-06,
|
| 9443 |
+
"loss": 0.0515,
|
| 9444 |
+
"step": 13420
|
| 9445 |
+
},
|
| 9446 |
+
{
|
| 9447 |
+
"epoch": 5.272870043188064,
|
| 9448 |
+
"grad_norm": 0.2601698637008667,
|
| 9449 |
+
"learning_rate": 2.0210135970333743e-06,
|
| 9450 |
+
"loss": 0.0444,
|
| 9451 |
+
"step": 13430
|
| 9452 |
+
},
|
| 9453 |
+
{
|
| 9454 |
+
"epoch": 5.276796230859835,
|
| 9455 |
+
"grad_norm": 2.402855396270752,
|
| 9456 |
+
"learning_rate": 2.0101068857703777e-06,
|
| 9457 |
+
"loss": 0.2045,
|
| 9458 |
+
"step": 13440
|
| 9459 |
+
},
|
| 9460 |
+
{
|
| 9461 |
+
"epoch": 5.280722418531606,
|
| 9462 |
+
"grad_norm": 3.287405490875244,
|
| 9463 |
+
"learning_rate": 1.99920017450738e-06,
|
| 9464 |
+
"loss": 0.0396,
|
| 9465 |
+
"step": 13450
|
| 9466 |
+
},
|
| 9467 |
+
{
|
| 9468 |
+
"epoch": 5.284648606203376,
|
| 9469 |
+
"grad_norm": 1.1102228164672852,
|
| 9470 |
+
"learning_rate": 1.988293463244383e-06,
|
| 9471 |
+
"loss": 0.3582,
|
| 9472 |
+
"step": 13460
|
| 9473 |
+
},
|
| 9474 |
+
{
|
| 9475 |
+
"epoch": 5.2885747938751475,
|
| 9476 |
+
"grad_norm": 0.3136877119541168,
|
| 9477 |
+
"learning_rate": 1.977386751981386e-06,
|
| 9478 |
+
"loss": 0.1289,
|
| 9479 |
+
"step": 13470
|
| 9480 |
+
},
|
| 9481 |
+
{
|
| 9482 |
+
"epoch": 5.292500981546918,
|
| 9483 |
+
"grad_norm": 0.09412948787212372,
|
| 9484 |
+
"learning_rate": 1.9664800407183887e-06,
|
| 9485 |
+
"loss": 0.0135,
|
| 9486 |
+
"step": 13480
|
| 9487 |
+
},
|
| 9488 |
+
{
|
| 9489 |
+
"epoch": 5.296427169218688,
|
| 9490 |
+
"grad_norm": 70.45732116699219,
|
| 9491 |
+
"learning_rate": 1.9555733294553917e-06,
|
| 9492 |
+
"loss": 0.437,
|
| 9493 |
+
"step": 13490
|
| 9494 |
+
},
|
| 9495 |
+
{
|
| 9496 |
+
"epoch": 5.30035335689046,
|
| 9497 |
+
"grad_norm": 13.51106071472168,
|
| 9498 |
+
"learning_rate": 1.9446666181923942e-06,
|
| 9499 |
+
"loss": 0.1514,
|
| 9500 |
+
"step": 13500
|
| 9501 |
+
},
|
| 9502 |
+
{
|
| 9503 |
+
"epoch": 5.30427954456223,
|
| 9504 |
+
"grad_norm": 0.05329679697751999,
|
| 9505 |
+
"learning_rate": 1.9337599069293972e-06,
|
| 9506 |
+
"loss": 0.2202,
|
| 9507 |
+
"step": 13510
|
| 9508 |
+
},
|
| 9509 |
+
{
|
| 9510 |
+
"epoch": 5.308205732234001,
|
| 9511 |
+
"grad_norm": 0.18707284331321716,
|
| 9512 |
+
"learning_rate": 1.9228531956664e-06,
|
| 9513 |
+
"loss": 0.0637,
|
| 9514 |
+
"step": 13520
|
| 9515 |
+
},
|
| 9516 |
+
{
|
| 9517 |
+
"epoch": 5.312131919905772,
|
| 9518 |
+
"grad_norm": 22.817306518554688,
|
| 9519 |
+
"learning_rate": 1.911946484403403e-06,
|
| 9520 |
+
"loss": 0.217,
|
| 9521 |
+
"step": 13530
|
| 9522 |
+
},
|
| 9523 |
+
{
|
| 9524 |
+
"epoch": 5.316058107577542,
|
| 9525 |
+
"grad_norm": 0.5910695791244507,
|
| 9526 |
+
"learning_rate": 1.901039773140406e-06,
|
| 9527 |
+
"loss": 0.1745,
|
| 9528 |
+
"step": 13540
|
| 9529 |
+
},
|
| 9530 |
+
{
|
| 9531 |
+
"epoch": 5.319984295249313,
|
| 9532 |
+
"grad_norm": 0.7026278972625732,
|
| 9533 |
+
"learning_rate": 1.8901330618774087e-06,
|
| 9534 |
+
"loss": 0.0093,
|
| 9535 |
+
"step": 13550
|
| 9536 |
+
},
|
| 9537 |
+
{
|
| 9538 |
+
"epoch": 5.323910482921083,
|
| 9539 |
+
"grad_norm": 1.2804770469665527,
|
| 9540 |
+
"learning_rate": 1.8792263506144115e-06,
|
| 9541 |
+
"loss": 0.1142,
|
| 9542 |
+
"step": 13560
|
| 9543 |
+
},
|
| 9544 |
+
{
|
| 9545 |
+
"epoch": 5.327836670592855,
|
| 9546 |
+
"grad_norm": 0.17652656137943268,
|
| 9547 |
+
"learning_rate": 1.8683196393514144e-06,
|
| 9548 |
+
"loss": 0.1111,
|
| 9549 |
+
"step": 13570
|
| 9550 |
+
},
|
| 9551 |
+
{
|
| 9552 |
+
"epoch": 5.331762858264625,
|
| 9553 |
+
"grad_norm": 31.41547393798828,
|
| 9554 |
+
"learning_rate": 1.8574129280884172e-06,
|
| 9555 |
+
"loss": 0.2906,
|
| 9556 |
+
"step": 13580
|
| 9557 |
+
},
|
| 9558 |
+
{
|
| 9559 |
+
"epoch": 5.3356890459363955,
|
| 9560 |
+
"grad_norm": 0.23575662076473236,
|
| 9561 |
+
"learning_rate": 1.84650621682542e-06,
|
| 9562 |
+
"loss": 0.2445,
|
| 9563 |
+
"step": 13590
|
| 9564 |
+
},
|
| 9565 |
+
{
|
| 9566 |
+
"epoch": 5.339615233608167,
|
| 9567 |
+
"grad_norm": 0.32999148964881897,
|
| 9568 |
+
"learning_rate": 1.8355995055624227e-06,
|
| 9569 |
+
"loss": 0.1179,
|
| 9570 |
+
"step": 13600
|
| 9571 |
+
},
|
| 9572 |
+
{
|
| 9573 |
+
"epoch": 5.343541421279937,
|
| 9574 |
+
"grad_norm": 0.01785041019320488,
|
| 9575 |
+
"learning_rate": 1.8246927942994257e-06,
|
| 9576 |
+
"loss": 0.0075,
|
| 9577 |
+
"step": 13610
|
| 9578 |
+
},
|
| 9579 |
+
{
|
| 9580 |
+
"epoch": 5.347467608951708,
|
| 9581 |
+
"grad_norm": 2.5955681800842285,
|
| 9582 |
+
"learning_rate": 1.8137860830364285e-06,
|
| 9583 |
+
"loss": 0.1413,
|
| 9584 |
+
"step": 13620
|
| 9585 |
+
},
|
| 9586 |
+
{
|
| 9587 |
+
"epoch": 5.351393796623478,
|
| 9588 |
+
"grad_norm": 29.785816192626953,
|
| 9589 |
+
"learning_rate": 1.8028793717734312e-06,
|
| 9590 |
+
"loss": 0.0787,
|
| 9591 |
+
"step": 13630
|
| 9592 |
+
},
|
| 9593 |
+
{
|
| 9594 |
+
"epoch": 5.3553199842952495,
|
| 9595 |
+
"grad_norm": 0.041280996054410934,
|
| 9596 |
+
"learning_rate": 1.791972660510434e-06,
|
| 9597 |
+
"loss": 0.0299,
|
| 9598 |
+
"step": 13640
|
| 9599 |
+
},
|
| 9600 |
+
{
|
| 9601 |
+
"epoch": 5.35924617196702,
|
| 9602 |
+
"grad_norm": 2.446070432662964,
|
| 9603 |
+
"learning_rate": 1.781065949247437e-06,
|
| 9604 |
+
"loss": 0.2202,
|
| 9605 |
+
"step": 13650
|
| 9606 |
+
},
|
| 9607 |
+
{
|
| 9608 |
+
"epoch": 5.36317235963879,
|
| 9609 |
+
"grad_norm": 0.14916792511940002,
|
| 9610 |
+
"learning_rate": 1.7701592379844397e-06,
|
| 9611 |
+
"loss": 0.0731,
|
| 9612 |
+
"step": 13660
|
| 9613 |
+
},
|
| 9614 |
+
{
|
| 9615 |
+
"epoch": 5.367098547310562,
|
| 9616 |
+
"grad_norm": 78.42562866210938,
|
| 9617 |
+
"learning_rate": 1.7592525267214425e-06,
|
| 9618 |
+
"loss": 0.2163,
|
| 9619 |
+
"step": 13670
|
| 9620 |
+
},
|
| 9621 |
+
{
|
| 9622 |
+
"epoch": 5.371024734982332,
|
| 9623 |
+
"grad_norm": 69.78839874267578,
|
| 9624 |
+
"learning_rate": 1.7483458154584455e-06,
|
| 9625 |
+
"loss": 0.2863,
|
| 9626 |
+
"step": 13680
|
| 9627 |
+
},
|
| 9628 |
+
{
|
| 9629 |
+
"epoch": 5.374950922654103,
|
| 9630 |
+
"grad_norm": 0.006996185053139925,
|
| 9631 |
+
"learning_rate": 1.7374391041954483e-06,
|
| 9632 |
+
"loss": 0.0782,
|
| 9633 |
+
"step": 13690
|
| 9634 |
+
},
|
| 9635 |
+
{
|
| 9636 |
+
"epoch": 5.378877110325874,
|
| 9637 |
+
"grad_norm": 113.34099578857422,
|
| 9638 |
+
"learning_rate": 1.7265323929324512e-06,
|
| 9639 |
+
"loss": 0.2028,
|
| 9640 |
+
"step": 13700
|
| 9641 |
+
},
|
| 9642 |
+
{
|
| 9643 |
+
"epoch": 5.382803297997644,
|
| 9644 |
+
"grad_norm": 12.9867582321167,
|
| 9645 |
+
"learning_rate": 1.715625681669454e-06,
|
| 9646 |
+
"loss": 0.2298,
|
| 9647 |
+
"step": 13710
|
| 9648 |
+
},
|
| 9649 |
+
{
|
| 9650 |
+
"epoch": 5.386729485669415,
|
| 9651 |
+
"grad_norm": 1.6531518697738647,
|
| 9652 |
+
"learning_rate": 1.704718970406457e-06,
|
| 9653 |
+
"loss": 0.0999,
|
| 9654 |
+
"step": 13720
|
| 9655 |
+
},
|
| 9656 |
+
{
|
| 9657 |
+
"epoch": 5.390655673341186,
|
| 9658 |
+
"grad_norm": 18.483884811401367,
|
| 9659 |
+
"learning_rate": 1.6938122591434597e-06,
|
| 9660 |
+
"loss": 0.0566,
|
| 9661 |
+
"step": 13730
|
| 9662 |
+
},
|
| 9663 |
+
{
|
| 9664 |
+
"epoch": 5.394581861012957,
|
| 9665 |
+
"grad_norm": 0.0013142261886969209,
|
| 9666 |
+
"learning_rate": 1.6829055478804625e-06,
|
| 9667 |
+
"loss": 0.0257,
|
| 9668 |
+
"step": 13740
|
| 9669 |
+
},
|
| 9670 |
+
{
|
| 9671 |
+
"epoch": 5.398508048684727,
|
| 9672 |
+
"grad_norm": 1.1040148735046387,
|
| 9673 |
+
"learning_rate": 1.6719988366174653e-06,
|
| 9674 |
+
"loss": 0.1075,
|
| 9675 |
+
"step": 13750
|
| 9676 |
+
},
|
| 9677 |
+
{
|
| 9678 |
+
"epoch": 5.402434236356497,
|
| 9679 |
+
"grad_norm": 0.766169011592865,
|
| 9680 |
+
"learning_rate": 1.6610921253544682e-06,
|
| 9681 |
+
"loss": 0.0146,
|
| 9682 |
+
"step": 13760
|
| 9683 |
+
},
|
| 9684 |
+
{
|
| 9685 |
+
"epoch": 5.406360424028269,
|
| 9686 |
+
"grad_norm": 0.00455129100009799,
|
| 9687 |
+
"learning_rate": 1.650185414091471e-06,
|
| 9688 |
+
"loss": 0.191,
|
| 9689 |
+
"step": 13770
|
| 9690 |
+
},
|
| 9691 |
+
{
|
| 9692 |
+
"epoch": 5.410286611700039,
|
| 9693 |
+
"grad_norm": 0.04159601777791977,
|
| 9694 |
+
"learning_rate": 1.6392787028284738e-06,
|
| 9695 |
+
"loss": 0.081,
|
| 9696 |
+
"step": 13780
|
| 9697 |
+
},
|
| 9698 |
+
{
|
| 9699 |
+
"epoch": 5.41421279937181,
|
| 9700 |
+
"grad_norm": 0.2454538196325302,
|
| 9701 |
+
"learning_rate": 1.6283719915654767e-06,
|
| 9702 |
+
"loss": 0.1503,
|
| 9703 |
+
"step": 13790
|
| 9704 |
+
},
|
| 9705 |
+
{
|
| 9706 |
+
"epoch": 5.418138987043581,
|
| 9707 |
+
"grad_norm": 3.542814016342163,
|
| 9708 |
+
"learning_rate": 1.6174652803024795e-06,
|
| 9709 |
+
"loss": 0.0179,
|
| 9710 |
+
"step": 13800
|
| 9711 |
+
},
|
| 9712 |
+
{
|
| 9713 |
+
"epoch": 5.422065174715351,
|
| 9714 |
+
"grad_norm": 0.015254409052431583,
|
| 9715 |
+
"learning_rate": 1.6065585690394823e-06,
|
| 9716 |
+
"loss": 0.0264,
|
| 9717 |
+
"step": 13810
|
| 9718 |
+
},
|
| 9719 |
+
{
|
| 9720 |
+
"epoch": 5.425991362387122,
|
| 9721 |
+
"grad_norm": 1.0630416870117188,
|
| 9722 |
+
"learning_rate": 1.595651857776485e-06,
|
| 9723 |
+
"loss": 0.212,
|
| 9724 |
+
"step": 13820
|
| 9725 |
+
},
|
| 9726 |
+
{
|
| 9727 |
+
"epoch": 5.429917550058893,
|
| 9728 |
+
"grad_norm": 0.8215121030807495,
|
| 9729 |
+
"learning_rate": 1.584745146513488e-06,
|
| 9730 |
+
"loss": 0.0404,
|
| 9731 |
+
"step": 13830
|
| 9732 |
+
},
|
| 9733 |
+
{
|
| 9734 |
+
"epoch": 5.433843737730664,
|
| 9735 |
+
"grad_norm": 2.257424831390381,
|
| 9736 |
+
"learning_rate": 1.5738384352504908e-06,
|
| 9737 |
+
"loss": 0.1186,
|
| 9738 |
+
"step": 13840
|
| 9739 |
+
},
|
| 9740 |
+
{
|
| 9741 |
+
"epoch": 5.437769925402434,
|
| 9742 |
+
"grad_norm": 0.004584351554512978,
|
| 9743 |
+
"learning_rate": 1.5629317239874935e-06,
|
| 9744 |
+
"loss": 0.3232,
|
| 9745 |
+
"step": 13850
|
| 9746 |
+
},
|
| 9747 |
+
{
|
| 9748 |
+
"epoch": 5.4416961130742045,
|
| 9749 |
+
"grad_norm": 0.18819616734981537,
|
| 9750 |
+
"learning_rate": 1.5520250127244965e-06,
|
| 9751 |
+
"loss": 0.0491,
|
| 9752 |
+
"step": 13860
|
| 9753 |
+
},
|
| 9754 |
+
{
|
| 9755 |
+
"epoch": 5.445622300745976,
|
| 9756 |
+
"grad_norm": 52.796051025390625,
|
| 9757 |
+
"learning_rate": 1.5411183014614995e-06,
|
| 9758 |
+
"loss": 0.1477,
|
| 9759 |
+
"step": 13870
|
| 9760 |
+
},
|
| 9761 |
+
{
|
| 9762 |
+
"epoch": 5.449548488417746,
|
| 9763 |
+
"grad_norm": 0.038888316601514816,
|
| 9764 |
+
"learning_rate": 1.5302115901985023e-06,
|
| 9765 |
+
"loss": 0.1332,
|
| 9766 |
+
"step": 13880
|
| 9767 |
+
},
|
| 9768 |
+
{
|
| 9769 |
+
"epoch": 5.453474676089517,
|
| 9770 |
+
"grad_norm": 25.012880325317383,
|
| 9771 |
+
"learning_rate": 1.519304878935505e-06,
|
| 9772 |
+
"loss": 0.0229,
|
| 9773 |
+
"step": 13890
|
| 9774 |
+
},
|
| 9775 |
+
{
|
| 9776 |
+
"epoch": 5.457400863761288,
|
| 9777 |
+
"grad_norm": 50.476722717285156,
|
| 9778 |
+
"learning_rate": 1.5083981676725078e-06,
|
| 9779 |
+
"loss": 0.2821,
|
| 9780 |
+
"step": 13900
|
| 9781 |
+
},
|
| 9782 |
+
{
|
| 9783 |
+
"epoch": 5.4613270514330585,
|
| 9784 |
+
"grad_norm": 0.554529070854187,
|
| 9785 |
+
"learning_rate": 1.4974914564095108e-06,
|
| 9786 |
+
"loss": 0.1492,
|
| 9787 |
+
"step": 13910
|
| 9788 |
+
},
|
| 9789 |
+
{
|
| 9790 |
+
"epoch": 5.465253239104829,
|
| 9791 |
+
"grad_norm": 39.14360046386719,
|
| 9792 |
+
"learning_rate": 1.4865847451465135e-06,
|
| 9793 |
+
"loss": 0.0737,
|
| 9794 |
+
"step": 13920
|
| 9795 |
+
},
|
| 9796 |
+
{
|
| 9797 |
+
"epoch": 5.4691794267766,
|
| 9798 |
+
"grad_norm": 27.91413688659668,
|
| 9799 |
+
"learning_rate": 1.4756780338835163e-06,
|
| 9800 |
+
"loss": 0.1312,
|
| 9801 |
+
"step": 13930
|
| 9802 |
+
},
|
| 9803 |
+
{
|
| 9804 |
+
"epoch": 5.473105614448371,
|
| 9805 |
+
"grad_norm": 4.997786521911621,
|
| 9806 |
+
"learning_rate": 1.4647713226205193e-06,
|
| 9807 |
+
"loss": 0.1063,
|
| 9808 |
+
"step": 13940
|
| 9809 |
+
},
|
| 9810 |
+
{
|
| 9811 |
+
"epoch": 5.477031802120141,
|
| 9812 |
+
"grad_norm": 3.62794828414917,
|
| 9813 |
+
"learning_rate": 1.453864611357522e-06,
|
| 9814 |
+
"loss": 0.1987,
|
| 9815 |
+
"step": 13950
|
| 9816 |
+
},
|
| 9817 |
+
{
|
| 9818 |
+
"epoch": 5.4809579897919125,
|
| 9819 |
+
"grad_norm": 0.12887300550937653,
|
| 9820 |
+
"learning_rate": 1.4429579000945248e-06,
|
| 9821 |
+
"loss": 0.0639,
|
| 9822 |
+
"step": 13960
|
| 9823 |
+
},
|
| 9824 |
+
{
|
| 9825 |
+
"epoch": 5.484884177463683,
|
| 9826 |
+
"grad_norm": 34.32918930053711,
|
| 9827 |
+
"learning_rate": 1.4320511888315276e-06,
|
| 9828 |
+
"loss": 0.1747,
|
| 9829 |
+
"step": 13970
|
| 9830 |
+
},
|
| 9831 |
+
{
|
| 9832 |
+
"epoch": 5.488810365135453,
|
| 9833 |
+
"grad_norm": 50.748191833496094,
|
| 9834 |
+
"learning_rate": 1.4211444775685305e-06,
|
| 9835 |
+
"loss": 0.2842,
|
| 9836 |
+
"step": 13980
|
| 9837 |
+
},
|
| 9838 |
+
{
|
| 9839 |
+
"epoch": 5.492736552807224,
|
| 9840 |
+
"grad_norm": 0.0012191717978566885,
|
| 9841 |
+
"learning_rate": 1.4102377663055333e-06,
|
| 9842 |
+
"loss": 0.3211,
|
| 9843 |
+
"step": 13990
|
| 9844 |
+
},
|
| 9845 |
+
{
|
| 9846 |
+
"epoch": 5.496662740478995,
|
| 9847 |
+
"grad_norm": 0.11494173109531403,
|
| 9848 |
+
"learning_rate": 1.399331055042536e-06,
|
| 9849 |
+
"loss": 0.0201,
|
| 9850 |
+
"step": 14000
|
| 9851 |
+
},
|
| 9852 |
+
{
|
| 9853 |
+
"epoch": 5.500588928150766,
|
| 9854 |
+
"grad_norm": 0.022875521332025528,
|
| 9855 |
+
"learning_rate": 1.388424343779539e-06,
|
| 9856 |
+
"loss": 0.2013,
|
| 9857 |
+
"step": 14010
|
| 9858 |
+
},
|
| 9859 |
+
{
|
| 9860 |
+
"epoch": 5.504515115822536,
|
| 9861 |
+
"grad_norm": 0.012128263711929321,
|
| 9862 |
+
"learning_rate": 1.377517632516542e-06,
|
| 9863 |
+
"loss": 0.0205,
|
| 9864 |
+
"step": 14020
|
| 9865 |
+
},
|
| 9866 |
+
{
|
| 9867 |
+
"epoch": 5.508441303494307,
|
| 9868 |
+
"grad_norm": 0.1611277312040329,
|
| 9869 |
+
"learning_rate": 1.3666109212535448e-06,
|
| 9870 |
+
"loss": 0.03,
|
| 9871 |
+
"step": 14030
|
| 9872 |
+
},
|
| 9873 |
+
{
|
| 9874 |
+
"epoch": 5.512367491166078,
|
| 9875 |
+
"grad_norm": 2.261368751525879,
|
| 9876 |
+
"learning_rate": 1.3557042099905476e-06,
|
| 9877 |
+
"loss": 0.0399,
|
| 9878 |
+
"step": 14040
|
| 9879 |
+
},
|
| 9880 |
+
{
|
| 9881 |
+
"epoch": 5.516293678837848,
|
| 9882 |
+
"grad_norm": 33.47151184082031,
|
| 9883 |
+
"learning_rate": 1.3447974987275505e-06,
|
| 9884 |
+
"loss": 0.0367,
|
| 9885 |
+
"step": 14050
|
| 9886 |
+
},
|
| 9887 |
+
{
|
| 9888 |
+
"epoch": 5.520219866509619,
|
| 9889 |
+
"grad_norm": 0.10368947684764862,
|
| 9890 |
+
"learning_rate": 1.3338907874645533e-06,
|
| 9891 |
+
"loss": 0.0327,
|
| 9892 |
+
"step": 14060
|
| 9893 |
+
},
|
| 9894 |
+
{
|
| 9895 |
+
"epoch": 5.52414605418139,
|
| 9896 |
+
"grad_norm": 4.692790985107422,
|
| 9897 |
+
"learning_rate": 1.322984076201556e-06,
|
| 9898 |
+
"loss": 0.1582,
|
| 9899 |
+
"step": 14070
|
| 9900 |
+
},
|
| 9901 |
+
{
|
| 9902 |
+
"epoch": 5.5280722418531605,
|
| 9903 |
+
"grad_norm": 0.893932044506073,
|
| 9904 |
+
"learning_rate": 1.3120773649385588e-06,
|
| 9905 |
+
"loss": 0.0526,
|
| 9906 |
+
"step": 14080
|
| 9907 |
+
},
|
| 9908 |
+
{
|
| 9909 |
+
"epoch": 5.531998429524931,
|
| 9910 |
+
"grad_norm": 5.193930625915527,
|
| 9911 |
+
"learning_rate": 1.3011706536755618e-06,
|
| 9912 |
+
"loss": 0.0851,
|
| 9913 |
+
"step": 14090
|
| 9914 |
+
},
|
| 9915 |
+
{
|
| 9916 |
+
"epoch": 5.535924617196702,
|
| 9917 |
+
"grad_norm": 1.462369441986084,
|
| 9918 |
+
"learning_rate": 1.2902639424125646e-06,
|
| 9919 |
+
"loss": 0.1355,
|
| 9920 |
+
"step": 14100
|
| 9921 |
+
},
|
| 9922 |
+
{
|
| 9923 |
+
"epoch": 5.539850804868473,
|
| 9924 |
+
"grad_norm": 0.06339607387781143,
|
| 9925 |
+
"learning_rate": 1.2793572311495673e-06,
|
| 9926 |
+
"loss": 0.1839,
|
| 9927 |
+
"step": 14110
|
| 9928 |
+
},
|
| 9929 |
+
{
|
| 9930 |
+
"epoch": 5.543776992540243,
|
| 9931 |
+
"grad_norm": 0.01694520190358162,
|
| 9932 |
+
"learning_rate": 1.26845051988657e-06,
|
| 9933 |
+
"loss": 0.1235,
|
| 9934 |
+
"step": 14120
|
| 9935 |
+
},
|
| 9936 |
+
{
|
| 9937 |
+
"epoch": 5.5477031802120145,
|
| 9938 |
+
"grad_norm": 0.2765995264053345,
|
| 9939 |
+
"learning_rate": 1.257543808623573e-06,
|
| 9940 |
+
"loss": 0.3504,
|
| 9941 |
+
"step": 14130
|
| 9942 |
+
},
|
| 9943 |
+
{
|
| 9944 |
+
"epoch": 5.551629367883785,
|
| 9945 |
+
"grad_norm": 0.2754051685333252,
|
| 9946 |
+
"learning_rate": 1.2466370973605758e-06,
|
| 9947 |
+
"loss": 0.0768,
|
| 9948 |
+
"step": 14140
|
| 9949 |
+
},
|
| 9950 |
+
{
|
| 9951 |
+
"epoch": 5.555555555555555,
|
| 9952 |
+
"grad_norm": 0.051648326218128204,
|
| 9953 |
+
"learning_rate": 1.2357303860975786e-06,
|
| 9954 |
+
"loss": 0.0335,
|
| 9955 |
+
"step": 14150
|
| 9956 |
+
},
|
| 9957 |
+
{
|
| 9958 |
+
"epoch": 5.559481743227327,
|
| 9959 |
+
"grad_norm": 0.07156907021999359,
|
| 9960 |
+
"learning_rate": 1.2248236748345816e-06,
|
| 9961 |
+
"loss": 0.0572,
|
| 9962 |
+
"step": 14160
|
| 9963 |
+
},
|
| 9964 |
+
{
|
| 9965 |
+
"epoch": 5.563407930899097,
|
| 9966 |
+
"grad_norm": 0.6992261409759521,
|
| 9967 |
+
"learning_rate": 1.2139169635715843e-06,
|
| 9968 |
+
"loss": 0.1361,
|
| 9969 |
+
"step": 14170
|
| 9970 |
+
},
|
| 9971 |
+
{
|
| 9972 |
+
"epoch": 5.567334118570868,
|
| 9973 |
+
"grad_norm": 2.310169219970703,
|
| 9974 |
+
"learning_rate": 1.2030102523085873e-06,
|
| 9975 |
+
"loss": 0.0061,
|
| 9976 |
+
"step": 14180
|
| 9977 |
+
},
|
| 9978 |
+
{
|
| 9979 |
+
"epoch": 5.571260306242638,
|
| 9980 |
+
"grad_norm": 1.5157902240753174,
|
| 9981 |
+
"learning_rate": 1.19210354104559e-06,
|
| 9982 |
+
"loss": 0.0122,
|
| 9983 |
+
"step": 14190
|
| 9984 |
+
},
|
| 9985 |
+
{
|
| 9986 |
+
"epoch": 5.575186493914409,
|
| 9987 |
+
"grad_norm": 29.78800392150879,
|
| 9988 |
+
"learning_rate": 1.181196829782593e-06,
|
| 9989 |
+
"loss": 0.1933,
|
| 9990 |
+
"step": 14200
|
| 9991 |
+
},
|
| 9992 |
+
{
|
| 9993 |
+
"epoch": 5.57911268158618,
|
| 9994 |
+
"grad_norm": 75.67921447753906,
|
| 9995 |
+
"learning_rate": 1.1702901185195958e-06,
|
| 9996 |
+
"loss": 0.2681,
|
| 9997 |
+
"step": 14210
|
| 9998 |
+
},
|
| 9999 |
+
{
|
| 10000 |
+
"epoch": 5.58303886925795,
|
| 10001 |
+
"grad_norm": 0.10809934139251709,
|
| 10002 |
+
"learning_rate": 1.1593834072565986e-06,
|
| 10003 |
+
"loss": 0.1424,
|
| 10004 |
+
"step": 14220
|
| 10005 |
+
},
|
| 10006 |
+
{
|
| 10007 |
+
"epoch": 5.586965056929722,
|
| 10008 |
+
"grad_norm": 0.010167748667299747,
|
| 10009 |
+
"learning_rate": 1.1484766959936014e-06,
|
| 10010 |
+
"loss": 0.1019,
|
| 10011 |
+
"step": 14230
|
| 10012 |
+
},
|
| 10013 |
+
{
|
| 10014 |
+
"epoch": 5.590891244601492,
|
| 10015 |
+
"grad_norm": 0.03884586691856384,
|
| 10016 |
+
"learning_rate": 1.1375699847306043e-06,
|
| 10017 |
+
"loss": 0.1516,
|
| 10018 |
+
"step": 14240
|
| 10019 |
+
},
|
| 10020 |
+
{
|
| 10021 |
+
"epoch": 5.5948174322732624,
|
| 10022 |
+
"grad_norm": 0.25132858753204346,
|
| 10023 |
+
"learning_rate": 1.126663273467607e-06,
|
| 10024 |
+
"loss": 0.0585,
|
| 10025 |
+
"step": 14250
|
| 10026 |
+
},
|
| 10027 |
+
{
|
| 10028 |
+
"epoch": 5.598743619945033,
|
| 10029 |
+
"grad_norm": 11.373438835144043,
|
| 10030 |
+
"learning_rate": 1.1157565622046099e-06,
|
| 10031 |
+
"loss": 0.1883,
|
| 10032 |
+
"step": 14260
|
| 10033 |
+
},
|
| 10034 |
+
{
|
| 10035 |
+
"epoch": 5.602669807616804,
|
| 10036 |
+
"grad_norm": 24.09672737121582,
|
| 10037 |
+
"learning_rate": 1.1048498509416128e-06,
|
| 10038 |
+
"loss": 0.1442,
|
| 10039 |
+
"step": 14270
|
| 10040 |
+
},
|
| 10041 |
+
{
|
| 10042 |
+
"epoch": 5.606595995288575,
|
| 10043 |
+
"grad_norm": 8.562515258789062,
|
| 10044 |
+
"learning_rate": 1.0939431396786156e-06,
|
| 10045 |
+
"loss": 0.2073,
|
| 10046 |
+
"step": 14280
|
| 10047 |
+
},
|
| 10048 |
+
{
|
| 10049 |
+
"epoch": 5.610522182960345,
|
| 10050 |
+
"grad_norm": 6.606204509735107,
|
| 10051 |
+
"learning_rate": 1.0830364284156184e-06,
|
| 10052 |
+
"loss": 0.0234,
|
| 10053 |
+
"step": 14290
|
| 10054 |
+
},
|
| 10055 |
+
{
|
| 10056 |
+
"epoch": 5.614448370632116,
|
| 10057 |
+
"grad_norm": 0.007998577319085598,
|
| 10058 |
+
"learning_rate": 1.0721297171526211e-06,
|
| 10059 |
+
"loss": 0.1604,
|
| 10060 |
+
"step": 14300
|
| 10061 |
+
},
|
| 10062 |
+
{
|
| 10063 |
+
"epoch": 5.618374558303887,
|
| 10064 |
+
"grad_norm": 5.682195663452148,
|
| 10065 |
+
"learning_rate": 1.0612230058896241e-06,
|
| 10066 |
+
"loss": 0.0596,
|
| 10067 |
+
"step": 14310
|
| 10068 |
+
},
|
| 10069 |
+
{
|
| 10070 |
+
"epoch": 5.622300745975657,
|
| 10071 |
+
"grad_norm": 20.193941116333008,
|
| 10072 |
+
"learning_rate": 1.0503162946266269e-06,
|
| 10073 |
+
"loss": 0.0647,
|
| 10074 |
+
"step": 14320
|
| 10075 |
+
},
|
| 10076 |
+
{
|
| 10077 |
+
"epoch": 5.626226933647429,
|
| 10078 |
+
"grad_norm": 0.019956370815634727,
|
| 10079 |
+
"learning_rate": 1.0394095833636299e-06,
|
| 10080 |
+
"loss": 0.0364,
|
| 10081 |
+
"step": 14330
|
| 10082 |
+
},
|
| 10083 |
+
{
|
| 10084 |
+
"epoch": 5.630153121319199,
|
| 10085 |
+
"grad_norm": 2.3317253589630127,
|
| 10086 |
+
"learning_rate": 1.0285028721006326e-06,
|
| 10087 |
+
"loss": 0.3789,
|
| 10088 |
+
"step": 14340
|
| 10089 |
+
},
|
| 10090 |
+
{
|
| 10091 |
+
"epoch": 5.6340793089909695,
|
| 10092 |
+
"grad_norm": 0.5026030540466309,
|
| 10093 |
+
"learning_rate": 1.0175961608376356e-06,
|
| 10094 |
+
"loss": 0.025,
|
| 10095 |
+
"step": 14350
|
| 10096 |
+
},
|
| 10097 |
+
{
|
| 10098 |
+
"epoch": 5.638005496662741,
|
| 10099 |
+
"grad_norm": 6.219483375549316,
|
| 10100 |
+
"learning_rate": 1.0066894495746384e-06,
|
| 10101 |
+
"loss": 0.0894,
|
| 10102 |
+
"step": 14360
|
| 10103 |
+
},
|
| 10104 |
+
{
|
| 10105 |
+
"epoch": 5.641931684334511,
|
| 10106 |
+
"grad_norm": 1.2812358140945435,
|
| 10107 |
+
"learning_rate": 9.957827383116411e-07,
|
| 10108 |
+
"loss": 0.1408,
|
| 10109 |
+
"step": 14370
|
| 10110 |
+
},
|
| 10111 |
+
{
|
| 10112 |
+
"epoch": 5.645857872006282,
|
| 10113 |
+
"grad_norm": 40.860984802246094,
|
| 10114 |
+
"learning_rate": 9.84876027048644e-07,
|
| 10115 |
+
"loss": 0.2064,
|
| 10116 |
+
"step": 14380
|
| 10117 |
+
},
|
| 10118 |
+
{
|
| 10119 |
+
"epoch": 5.649784059678053,
|
| 10120 |
+
"grad_norm": 0.19107870757579803,
|
| 10121 |
+
"learning_rate": 9.739693157856469e-07,
|
| 10122 |
+
"loss": 0.0781,
|
| 10123 |
+
"step": 14390
|
| 10124 |
+
},
|
| 10125 |
+
{
|
| 10126 |
+
"epoch": 5.6537102473498235,
|
| 10127 |
+
"grad_norm": 1.1489980220794678,
|
| 10128 |
+
"learning_rate": 9.630626045226496e-07,
|
| 10129 |
+
"loss": 0.3186,
|
| 10130 |
+
"step": 14400
|
| 10131 |
+
},
|
| 10132 |
+
{
|
| 10133 |
+
"epoch": 5.657636435021594,
|
| 10134 |
+
"grad_norm": 33.495182037353516,
|
| 10135 |
+
"learning_rate": 9.521558932596524e-07,
|
| 10136 |
+
"loss": 0.0795,
|
| 10137 |
+
"step": 14410
|
| 10138 |
+
},
|
| 10139 |
+
{
|
| 10140 |
+
"epoch": 5.661562622693364,
|
| 10141 |
+
"grad_norm": 0.024215303361415863,
|
| 10142 |
+
"learning_rate": 9.412491819966554e-07,
|
| 10143 |
+
"loss": 0.0617,
|
| 10144 |
+
"step": 14420
|
| 10145 |
+
},
|
| 10146 |
+
{
|
| 10147 |
+
"epoch": 5.665488810365136,
|
| 10148 |
+
"grad_norm": 0.06826794147491455,
|
| 10149 |
+
"learning_rate": 9.303424707336581e-07,
|
| 10150 |
+
"loss": 0.1631,
|
| 10151 |
+
"step": 14430
|
| 10152 |
+
},
|
| 10153 |
+
{
|
| 10154 |
+
"epoch": 5.669414998036906,
|
| 10155 |
+
"grad_norm": 0.21508584916591644,
|
| 10156 |
+
"learning_rate": 9.194357594706609e-07,
|
| 10157 |
+
"loss": 0.0656,
|
| 10158 |
+
"step": 14440
|
| 10159 |
+
},
|
| 10160 |
+
{
|
| 10161 |
+
"epoch": 5.673341185708677,
|
| 10162 |
+
"grad_norm": 0.9320672154426575,
|
| 10163 |
+
"learning_rate": 9.085290482076639e-07,
|
| 10164 |
+
"loss": 0.0274,
|
| 10165 |
+
"step": 14450
|
| 10166 |
+
},
|
| 10167 |
+
{
|
| 10168 |
+
"epoch": 5.677267373380448,
|
| 10169 |
+
"grad_norm": 0.06282981485128403,
|
| 10170 |
+
"learning_rate": 8.976223369446666e-07,
|
| 10171 |
+
"loss": 0.009,
|
| 10172 |
+
"step": 14460
|
| 10173 |
+
},
|
| 10174 |
+
{
|
| 10175 |
+
"epoch": 5.681193561052218,
|
| 10176 |
+
"grad_norm": 0.796981155872345,
|
| 10177 |
+
"learning_rate": 8.867156256816695e-07,
|
| 10178 |
+
"loss": 0.0869,
|
| 10179 |
+
"step": 14470
|
| 10180 |
+
},
|
| 10181 |
+
{
|
| 10182 |
+
"epoch": 5.685119748723989,
|
| 10183 |
+
"grad_norm": 0.47704437375068665,
|
| 10184 |
+
"learning_rate": 8.758089144186723e-07,
|
| 10185 |
+
"loss": 0.0558,
|
| 10186 |
+
"step": 14480
|
| 10187 |
+
},
|
| 10188 |
+
{
|
| 10189 |
+
"epoch": 5.689045936395759,
|
| 10190 |
+
"grad_norm": 0.8715189099311829,
|
| 10191 |
+
"learning_rate": 8.649022031556751e-07,
|
| 10192 |
+
"loss": 0.3915,
|
| 10193 |
+
"step": 14490
|
| 10194 |
+
},
|
| 10195 |
+
{
|
| 10196 |
+
"epoch": 5.692972124067531,
|
| 10197 |
+
"grad_norm": 2.462773561477661,
|
| 10198 |
+
"learning_rate": 8.53995491892678e-07,
|
| 10199 |
+
"loss": 0.2456,
|
| 10200 |
+
"step": 14500
|
| 10201 |
+
},
|
| 10202 |
+
{
|
| 10203 |
+
"epoch": 5.696898311739301,
|
| 10204 |
+
"grad_norm": 0.7116551399230957,
|
| 10205 |
+
"learning_rate": 8.430887806296808e-07,
|
| 10206 |
+
"loss": 0.0251,
|
| 10207 |
+
"step": 14510
|
| 10208 |
+
},
|
| 10209 |
+
{
|
| 10210 |
+
"epoch": 5.7008244994110715,
|
| 10211 |
+
"grad_norm": 1.1840918064117432,
|
| 10212 |
+
"learning_rate": 8.321820693666837e-07,
|
| 10213 |
+
"loss": 0.1322,
|
| 10214 |
+
"step": 14520
|
| 10215 |
+
},
|
| 10216 |
+
{
|
| 10217 |
+
"epoch": 5.704750687082843,
|
| 10218 |
+
"grad_norm": 0.09072090685367584,
|
| 10219 |
+
"learning_rate": 8.212753581036865e-07,
|
| 10220 |
+
"loss": 0.1009,
|
| 10221 |
+
"step": 14530
|
| 10222 |
+
},
|
| 10223 |
+
{
|
| 10224 |
+
"epoch": 5.708676874754613,
|
| 10225 |
+
"grad_norm": 28.93404769897461,
|
| 10226 |
+
"learning_rate": 8.103686468406894e-07,
|
| 10227 |
+
"loss": 0.145,
|
| 10228 |
+
"step": 14540
|
| 10229 |
+
},
|
| 10230 |
+
{
|
| 10231 |
+
"epoch": 5.712603062426384,
|
| 10232 |
+
"grad_norm": 1.9189600944519043,
|
| 10233 |
+
"learning_rate": 7.994619355776922e-07,
|
| 10234 |
+
"loss": 0.0692,
|
| 10235 |
+
"step": 14550
|
| 10236 |
+
},
|
| 10237 |
+
{
|
| 10238 |
+
"epoch": 5.716529250098155,
|
| 10239 |
+
"grad_norm": 0.047376301139593124,
|
| 10240 |
+
"learning_rate": 7.88555224314695e-07,
|
| 10241 |
+
"loss": 0.0679,
|
| 10242 |
+
"step": 14560
|
| 10243 |
+
},
|
| 10244 |
+
{
|
| 10245 |
+
"epoch": 5.7204554377699255,
|
| 10246 |
+
"grad_norm": 0.20065166056156158,
|
| 10247 |
+
"learning_rate": 7.776485130516978e-07,
|
| 10248 |
+
"loss": 0.0101,
|
| 10249 |
+
"step": 14570
|
| 10250 |
+
},
|
| 10251 |
+
{
|
| 10252 |
+
"epoch": 5.724381625441696,
|
| 10253 |
+
"grad_norm": 0.5917192697525024,
|
| 10254 |
+
"learning_rate": 7.667418017887007e-07,
|
| 10255 |
+
"loss": 0.0334,
|
| 10256 |
+
"step": 14580
|
| 10257 |
+
},
|
| 10258 |
+
{
|
| 10259 |
+
"epoch": 5.728307813113467,
|
| 10260 |
+
"grad_norm": 75.4139175415039,
|
| 10261 |
+
"learning_rate": 7.558350905257034e-07,
|
| 10262 |
+
"loss": 0.0821,
|
| 10263 |
+
"step": 14590
|
| 10264 |
+
},
|
| 10265 |
+
{
|
| 10266 |
+
"epoch": 5.732234000785238,
|
| 10267 |
+
"grad_norm": 0.0929323062300682,
|
| 10268 |
+
"learning_rate": 7.449283792627063e-07,
|
| 10269 |
+
"loss": 0.053,
|
| 10270 |
+
"step": 14600
|
| 10271 |
+
},
|
| 10272 |
+
{
|
| 10273 |
+
"epoch": 5.736160188457008,
|
| 10274 |
+
"grad_norm": 0.023365721106529236,
|
| 10275 |
+
"learning_rate": 7.340216679997092e-07,
|
| 10276 |
+
"loss": 0.0522,
|
| 10277 |
+
"step": 14610
|
| 10278 |
+
},
|
| 10279 |
+
{
|
| 10280 |
+
"epoch": 5.740086376128779,
|
| 10281 |
+
"grad_norm": 1.7454125881195068,
|
| 10282 |
+
"learning_rate": 7.23114956736712e-07,
|
| 10283 |
+
"loss": 0.0152,
|
| 10284 |
+
"step": 14620
|
| 10285 |
+
},
|
| 10286 |
+
{
|
| 10287 |
+
"epoch": 5.74401256380055,
|
| 10288 |
+
"grad_norm": 48.125389099121094,
|
| 10289 |
+
"learning_rate": 7.122082454737149e-07,
|
| 10290 |
+
"loss": 0.3991,
|
| 10291 |
+
"step": 14630
|
| 10292 |
+
},
|
| 10293 |
+
{
|
| 10294 |
+
"epoch": 5.74793875147232,
|
| 10295 |
+
"grad_norm": 0.10581380128860474,
|
| 10296 |
+
"learning_rate": 7.013015342107177e-07,
|
| 10297 |
+
"loss": 0.0214,
|
| 10298 |
+
"step": 14640
|
| 10299 |
+
},
|
| 10300 |
+
{
|
| 10301 |
+
"epoch": 5.751864939144091,
|
| 10302 |
+
"grad_norm": 0.009025956504046917,
|
| 10303 |
+
"learning_rate": 6.903948229477205e-07,
|
| 10304 |
+
"loss": 0.0195,
|
| 10305 |
+
"step": 14650
|
| 10306 |
+
},
|
| 10307 |
+
{
|
| 10308 |
+
"epoch": 5.755791126815862,
|
| 10309 |
+
"grad_norm": 3.209620237350464,
|
| 10310 |
+
"learning_rate": 6.794881116847233e-07,
|
| 10311 |
+
"loss": 0.0433,
|
| 10312 |
+
"step": 14660
|
| 10313 |
+
},
|
| 10314 |
+
{
|
| 10315 |
+
"epoch": 5.759717314487633,
|
| 10316 |
+
"grad_norm": 0.2999258041381836,
|
| 10317 |
+
"learning_rate": 6.685814004217262e-07,
|
| 10318 |
+
"loss": 0.1324,
|
| 10319 |
+
"step": 14670
|
| 10320 |
+
},
|
| 10321 |
+
{
|
| 10322 |
+
"epoch": 5.763643502159403,
|
| 10323 |
+
"grad_norm": 52.33720779418945,
|
| 10324 |
+
"learning_rate": 6.576746891587289e-07,
|
| 10325 |
+
"loss": 0.2876,
|
| 10326 |
+
"step": 14680
|
| 10327 |
+
},
|
| 10328 |
+
{
|
| 10329 |
+
"epoch": 5.7675696898311735,
|
| 10330 |
+
"grad_norm": 38.36469650268555,
|
| 10331 |
+
"learning_rate": 6.467679778957319e-07,
|
| 10332 |
+
"loss": 0.3568,
|
| 10333 |
+
"step": 14690
|
| 10334 |
+
},
|
| 10335 |
+
{
|
| 10336 |
+
"epoch": 5.771495877502945,
|
| 10337 |
+
"grad_norm": 3.552603244781494,
|
| 10338 |
+
"learning_rate": 6.358612666327347e-07,
|
| 10339 |
+
"loss": 0.2049,
|
| 10340 |
+
"step": 14700
|
| 10341 |
+
},
|
| 10342 |
+
{
|
| 10343 |
+
"epoch": 5.775422065174715,
|
| 10344 |
+
"grad_norm": 66.29668426513672,
|
| 10345 |
+
"learning_rate": 6.249545553697376e-07,
|
| 10346 |
+
"loss": 0.2132,
|
| 10347 |
+
"step": 14710
|
| 10348 |
+
},
|
| 10349 |
+
{
|
| 10350 |
+
"epoch": 5.779348252846486,
|
| 10351 |
+
"grad_norm": 8.237017631530762,
|
| 10352 |
+
"learning_rate": 6.140478441067403e-07,
|
| 10353 |
+
"loss": 0.0869,
|
| 10354 |
+
"step": 14720
|
| 10355 |
+
},
|
| 10356 |
+
{
|
| 10357 |
+
"epoch": 5.783274440518257,
|
| 10358 |
+
"grad_norm": 1.2616140842437744,
|
| 10359 |
+
"learning_rate": 6.031411328437432e-07,
|
| 10360 |
+
"loss": 0.1083,
|
| 10361 |
+
"step": 14730
|
| 10362 |
+
},
|
| 10363 |
+
{
|
| 10364 |
+
"epoch": 5.7872006281900275,
|
| 10365 |
+
"grad_norm": 0.026556458324193954,
|
| 10366 |
+
"learning_rate": 5.922344215807461e-07,
|
| 10367 |
+
"loss": 0.0232,
|
| 10368 |
+
"step": 14740
|
| 10369 |
+
},
|
| 10370 |
+
{
|
| 10371 |
+
"epoch": 5.791126815861798,
|
| 10372 |
+
"grad_norm": 13.759248733520508,
|
| 10373 |
+
"learning_rate": 5.813277103177488e-07,
|
| 10374 |
+
"loss": 0.0617,
|
| 10375 |
+
"step": 14750
|
| 10376 |
+
},
|
| 10377 |
+
{
|
| 10378 |
+
"epoch": 5.795053003533569,
|
| 10379 |
+
"grad_norm": 0.7599974274635315,
|
| 10380 |
+
"learning_rate": 5.704209990547517e-07,
|
| 10381 |
+
"loss": 0.0713,
|
| 10382 |
+
"step": 14760
|
| 10383 |
+
},
|
| 10384 |
+
{
|
| 10385 |
+
"epoch": 5.79897919120534,
|
| 10386 |
+
"grad_norm": 1.4931432008743286,
|
| 10387 |
+
"learning_rate": 5.595142877917546e-07,
|
| 10388 |
+
"loss": 0.0436,
|
| 10389 |
+
"step": 14770
|
| 10390 |
+
},
|
| 10391 |
+
{
|
| 10392 |
+
"epoch": 5.80290537887711,
|
| 10393 |
+
"grad_norm": 1.4563673734664917,
|
| 10394 |
+
"learning_rate": 5.486075765287574e-07,
|
| 10395 |
+
"loss": 0.0414,
|
| 10396 |
+
"step": 14780
|
| 10397 |
+
},
|
| 10398 |
+
{
|
| 10399 |
+
"epoch": 5.806831566548881,
|
| 10400 |
+
"grad_norm": 0.5742218494415283,
|
| 10401 |
+
"learning_rate": 5.377008652657602e-07,
|
| 10402 |
+
"loss": 0.0353,
|
| 10403 |
+
"step": 14790
|
| 10404 |
+
},
|
| 10405 |
+
{
|
| 10406 |
+
"epoch": 5.810757754220652,
|
| 10407 |
+
"grad_norm": 0.47356608510017395,
|
| 10408 |
+
"learning_rate": 5.267941540027631e-07,
|
| 10409 |
+
"loss": 0.0474,
|
| 10410 |
+
"step": 14800
|
| 10411 |
+
},
|
| 10412 |
+
{
|
| 10413 |
+
"epoch": 5.814683941892422,
|
| 10414 |
+
"grad_norm": 42.07039260864258,
|
| 10415 |
+
"learning_rate": 5.158874427397658e-07,
|
| 10416 |
+
"loss": 0.205,
|
| 10417 |
+
"step": 14810
|
| 10418 |
+
},
|
| 10419 |
+
{
|
| 10420 |
+
"epoch": 5.818610129564194,
|
| 10421 |
+
"grad_norm": 0.021612640470266342,
|
| 10422 |
+
"learning_rate": 5.049807314767687e-07,
|
| 10423 |
+
"loss": 0.2927,
|
| 10424 |
+
"step": 14820
|
| 10425 |
+
},
|
| 10426 |
+
{
|
| 10427 |
+
"epoch": 5.822536317235964,
|
| 10428 |
+
"grad_norm": 15.521261215209961,
|
| 10429 |
+
"learning_rate": 4.940740202137715e-07,
|
| 10430 |
+
"loss": 0.1204,
|
| 10431 |
+
"step": 14830
|
| 10432 |
+
},
|
| 10433 |
+
{
|
| 10434 |
+
"epoch": 5.8264625049077345,
|
| 10435 |
+
"grad_norm": 1.3707491159439087,
|
| 10436 |
+
"learning_rate": 4.831673089507743e-07,
|
| 10437 |
+
"loss": 0.0651,
|
| 10438 |
+
"step": 14840
|
| 10439 |
+
},
|
| 10440 |
+
{
|
| 10441 |
+
"epoch": 5.830388692579505,
|
| 10442 |
+
"grad_norm": 1.2329983711242676,
|
| 10443 |
+
"learning_rate": 4.7226059768777727e-07,
|
| 10444 |
+
"loss": 0.1242,
|
| 10445 |
+
"step": 14850
|
| 10446 |
+
},
|
| 10447 |
+
{
|
| 10448 |
+
"epoch": 5.834314880251276,
|
| 10449 |
+
"grad_norm": 39.72314453125,
|
| 10450 |
+
"learning_rate": 4.6135388642478003e-07,
|
| 10451 |
+
"loss": 0.3012,
|
| 10452 |
+
"step": 14860
|
| 10453 |
+
},
|
| 10454 |
+
{
|
| 10455 |
+
"epoch": 5.838241067923047,
|
| 10456 |
+
"grad_norm": 14.552694320678711,
|
| 10457 |
+
"learning_rate": 4.504471751617829e-07,
|
| 10458 |
+
"loss": 0.1662,
|
| 10459 |
+
"step": 14870
|
| 10460 |
+
},
|
| 10461 |
+
{
|
| 10462 |
+
"epoch": 5.842167255594817,
|
| 10463 |
+
"grad_norm": 0.45098528265953064,
|
| 10464 |
+
"learning_rate": 4.395404638987857e-07,
|
| 10465 |
+
"loss": 0.0267,
|
| 10466 |
+
"step": 14880
|
| 10467 |
+
},
|
| 10468 |
+
{
|
| 10469 |
+
"epoch": 5.8460934432665885,
|
| 10470 |
+
"grad_norm": 12.383710861206055,
|
| 10471 |
+
"learning_rate": 4.2863375263578854e-07,
|
| 10472 |
+
"loss": 0.0692,
|
| 10473 |
+
"step": 14890
|
| 10474 |
+
},
|
| 10475 |
+
{
|
| 10476 |
+
"epoch": 5.850019630938359,
|
| 10477 |
+
"grad_norm": 0.05478942394256592,
|
| 10478 |
+
"learning_rate": 4.1772704137279136e-07,
|
| 10479 |
+
"loss": 0.0624,
|
| 10480 |
+
"step": 14900
|
| 10481 |
+
},
|
| 10482 |
+
{
|
| 10483 |
+
"epoch": 5.853945818610129,
|
| 10484 |
+
"grad_norm": 0.025708330795168877,
|
| 10485 |
+
"learning_rate": 4.068203301097943e-07,
|
| 10486 |
+
"loss": 0.239,
|
| 10487 |
+
"step": 14910
|
| 10488 |
+
},
|
| 10489 |
+
{
|
| 10490 |
+
"epoch": 5.8578720062819,
|
| 10491 |
+
"grad_norm": 0.9735015630722046,
|
| 10492 |
+
"learning_rate": 3.959136188467971e-07,
|
| 10493 |
+
"loss": 0.0631,
|
| 10494 |
+
"step": 14920
|
| 10495 |
+
},
|
| 10496 |
+
{
|
| 10497 |
+
"epoch": 5.861798193953671,
|
| 10498 |
+
"grad_norm": 1.240945816040039,
|
| 10499 |
+
"learning_rate": 3.850069075837999e-07,
|
| 10500 |
+
"loss": 0.046,
|
| 10501 |
+
"step": 14930
|
| 10502 |
+
},
|
| 10503 |
+
{
|
| 10504 |
+
"epoch": 5.865724381625442,
|
| 10505 |
+
"grad_norm": 0.002003891160711646,
|
| 10506 |
+
"learning_rate": 3.7410019632080273e-07,
|
| 10507 |
+
"loss": 0.0147,
|
| 10508 |
+
"step": 14940
|
| 10509 |
+
},
|
| 10510 |
+
{
|
| 10511 |
+
"epoch": 5.869650569297212,
|
| 10512 |
+
"grad_norm": Infinity,
|
| 10513 |
+
"learning_rate": 3.631934850578056e-07,
|
| 10514 |
+
"loss": 0.6244,
|
| 10515 |
+
"step": 14950
|
| 10516 |
+
},
|
| 10517 |
+
{
|
| 10518 |
+
"epoch": 5.873576756968983,
|
| 10519 |
+
"grad_norm": 0.04449770227074623,
|
| 10520 |
+
"learning_rate": 3.522867737948084e-07,
|
| 10521 |
+
"loss": 0.0087,
|
| 10522 |
+
"step": 14960
|
| 10523 |
+
},
|
| 10524 |
+
{
|
| 10525 |
+
"epoch": 5.877502944640754,
|
| 10526 |
+
"grad_norm": 0.5825850367546082,
|
| 10527 |
+
"learning_rate": 3.4138006253181124e-07,
|
| 10528 |
+
"loss": 0.0921,
|
| 10529 |
+
"step": 14970
|
| 10530 |
+
},
|
| 10531 |
+
{
|
| 10532 |
+
"epoch": 5.881429132312524,
|
| 10533 |
+
"grad_norm": 2.847191095352173,
|
| 10534 |
+
"learning_rate": 3.3047335126881406e-07,
|
| 10535 |
+
"loss": 0.0513,
|
| 10536 |
+
"step": 14980
|
| 10537 |
+
},
|
| 10538 |
+
{
|
| 10539 |
+
"epoch": 5.885355319984296,
|
| 10540 |
+
"grad_norm": 35.19565200805664,
|
| 10541 |
+
"learning_rate": 3.1956664000581693e-07,
|
| 10542 |
+
"loss": 0.45,
|
| 10543 |
+
"step": 14990
|
| 10544 |
+
},
|
| 10545 |
+
{
|
| 10546 |
+
"epoch": 5.889281507656066,
|
| 10547 |
+
"grad_norm": 53.325225830078125,
|
| 10548 |
+
"learning_rate": 3.0865992874281975e-07,
|
| 10549 |
+
"loss": 0.0693,
|
| 10550 |
+
"step": 15000
|
| 10551 |
+
},
|
| 10552 |
+
{
|
| 10553 |
+
"epoch": 5.8932076953278365,
|
| 10554 |
+
"grad_norm": 15.387211799621582,
|
| 10555 |
+
"learning_rate": 2.9775321747982256e-07,
|
| 10556 |
+
"loss": 0.1537,
|
| 10557 |
+
"step": 15010
|
| 10558 |
+
},
|
| 10559 |
+
{
|
| 10560 |
+
"epoch": 5.897133882999608,
|
| 10561 |
+
"grad_norm": 6.742716312408447,
|
| 10562 |
+
"learning_rate": 2.868465062168254e-07,
|
| 10563 |
+
"loss": 0.1971,
|
| 10564 |
+
"step": 15020
|
| 10565 |
+
},
|
| 10566 |
+
{
|
| 10567 |
+
"epoch": 5.901060070671378,
|
| 10568 |
+
"grad_norm": 0.0015198083128780127,
|
| 10569 |
+
"learning_rate": 2.759397949538283e-07,
|
| 10570 |
+
"loss": 0.2437,
|
| 10571 |
+
"step": 15030
|
| 10572 |
+
},
|
| 10573 |
+
{
|
| 10574 |
+
"epoch": 5.904986258343149,
|
| 10575 |
+
"grad_norm": 1.0140429735183716,
|
| 10576 |
+
"learning_rate": 2.650330836908311e-07,
|
| 10577 |
+
"loss": 0.0108,
|
| 10578 |
+
"step": 15040
|
| 10579 |
+
},
|
| 10580 |
+
{
|
| 10581 |
+
"epoch": 5.908912446014919,
|
| 10582 |
+
"grad_norm": 0.36821112036705017,
|
| 10583 |
+
"learning_rate": 2.5412637242783394e-07,
|
| 10584 |
+
"loss": 0.1091,
|
| 10585 |
+
"step": 15050
|
| 10586 |
+
},
|
| 10587 |
+
{
|
| 10588 |
+
"epoch": 5.9128386336866905,
|
| 10589 |
+
"grad_norm": 63.82198715209961,
|
| 10590 |
+
"learning_rate": 2.4321966116483676e-07,
|
| 10591 |
+
"loss": 0.3013,
|
| 10592 |
+
"step": 15060
|
| 10593 |
+
},
|
| 10594 |
+
{
|
| 10595 |
+
"epoch": 5.916764821358461,
|
| 10596 |
+
"grad_norm": 3.619304895401001,
|
| 10597 |
+
"learning_rate": 2.323129499018396e-07,
|
| 10598 |
+
"loss": 0.0495,
|
| 10599 |
+
"step": 15070
|
| 10600 |
+
},
|
| 10601 |
+
{
|
| 10602 |
+
"epoch": 5.920691009030231,
|
| 10603 |
+
"grad_norm": 1.9223240613937378,
|
| 10604 |
+
"learning_rate": 2.2140623863884245e-07,
|
| 10605 |
+
"loss": 0.0587,
|
| 10606 |
+
"step": 15080
|
| 10607 |
+
},
|
| 10608 |
+
{
|
| 10609 |
+
"epoch": 5.924617196702003,
|
| 10610 |
+
"grad_norm": 0.054315753281116486,
|
| 10611 |
+
"learning_rate": 2.1049952737584526e-07,
|
| 10612 |
+
"loss": 0.1429,
|
| 10613 |
+
"step": 15090
|
| 10614 |
+
},
|
| 10615 |
+
{
|
| 10616 |
+
"epoch": 5.928543384373773,
|
| 10617 |
+
"grad_norm": 0.6880110502243042,
|
| 10618 |
+
"learning_rate": 1.995928161128481e-07,
|
| 10619 |
+
"loss": 0.0702,
|
| 10620 |
+
"step": 15100
|
| 10621 |
+
},
|
| 10622 |
+
{
|
| 10623 |
+
"epoch": 5.932469572045544,
|
| 10624 |
+
"grad_norm": 1.5579729080200195,
|
| 10625 |
+
"learning_rate": 1.8868610484985093e-07,
|
| 10626 |
+
"loss": 0.0362,
|
| 10627 |
+
"step": 15110
|
| 10628 |
+
},
|
| 10629 |
+
{
|
| 10630 |
+
"epoch": 5.936395759717314,
|
| 10631 |
+
"grad_norm": 7.767218112945557,
|
| 10632 |
+
"learning_rate": 1.777793935868538e-07,
|
| 10633 |
+
"loss": 0.1695,
|
| 10634 |
+
"step": 15120
|
| 10635 |
+
},
|
| 10636 |
+
{
|
| 10637 |
+
"epoch": 5.940321947389085,
|
| 10638 |
+
"grad_norm": 2.507051706314087,
|
| 10639 |
+
"learning_rate": 1.6687268232385661e-07,
|
| 10640 |
+
"loss": 0.092,
|
| 10641 |
+
"step": 15130
|
| 10642 |
+
},
|
| 10643 |
+
{
|
| 10644 |
+
"epoch": 5.944248135060856,
|
| 10645 |
+
"grad_norm": 5.474608421325684,
|
| 10646 |
+
"learning_rate": 1.5596597106085946e-07,
|
| 10647 |
+
"loss": 0.0219,
|
| 10648 |
+
"step": 15140
|
| 10649 |
+
},
|
| 10650 |
+
{
|
| 10651 |
+
"epoch": 5.948174322732626,
|
| 10652 |
+
"grad_norm": 2.6450958251953125,
|
| 10653 |
+
"learning_rate": 1.4505925979786228e-07,
|
| 10654 |
+
"loss": 0.0861,
|
| 10655 |
+
"step": 15150
|
| 10656 |
+
},
|
| 10657 |
+
{
|
| 10658 |
+
"epoch": 5.952100510404398,
|
| 10659 |
+
"grad_norm": 0.38818231225013733,
|
| 10660 |
+
"learning_rate": 1.3415254853486512e-07,
|
| 10661 |
+
"loss": 0.023,
|
| 10662 |
+
"step": 15160
|
| 10663 |
+
},
|
| 10664 |
+
{
|
| 10665 |
+
"epoch": 5.956026698076168,
|
| 10666 |
+
"grad_norm": 3.4643454551696777,
|
| 10667 |
+
"learning_rate": 1.2324583727186794e-07,
|
| 10668 |
+
"loss": 0.1185,
|
| 10669 |
+
"step": 15170
|
| 10670 |
+
},
|
| 10671 |
+
{
|
| 10672 |
+
"epoch": 5.9599528857479385,
|
| 10673 |
+
"grad_norm": 0.08205350488424301,
|
| 10674 |
+
"learning_rate": 1.123391260088708e-07,
|
| 10675 |
+
"loss": 0.0154,
|
| 10676 |
+
"step": 15180
|
| 10677 |
+
},
|
| 10678 |
+
{
|
| 10679 |
+
"epoch": 5.96387907341971,
|
| 10680 |
+
"grad_norm": 19.57013702392578,
|
| 10681 |
+
"learning_rate": 1.0143241474587364e-07,
|
| 10682 |
+
"loss": 0.1135,
|
| 10683 |
+
"step": 15190
|
| 10684 |
+
},
|
| 10685 |
+
{
|
| 10686 |
+
"epoch": 5.96780526109148,
|
| 10687 |
+
"grad_norm": 0.3869020938873291,
|
| 10688 |
+
"learning_rate": 9.052570348287647e-08,
|
| 10689 |
+
"loss": 0.1303,
|
| 10690 |
+
"step": 15200
|
| 10691 |
+
},
|
| 10692 |
+
{
|
| 10693 |
+
"epoch": 5.971731448763251,
|
| 10694 |
+
"grad_norm": 0.9908976554870605,
|
| 10695 |
+
"learning_rate": 7.96189922198793e-08,
|
| 10696 |
+
"loss": 0.2838,
|
| 10697 |
+
"step": 15210
|
| 10698 |
+
},
|
| 10699 |
+
{
|
| 10700 |
+
"epoch": 5.975657636435022,
|
| 10701 |
+
"grad_norm": 0.01841503009200096,
|
| 10702 |
+
"learning_rate": 6.871228095688215e-08,
|
| 10703 |
+
"loss": 0.2122,
|
| 10704 |
+
"step": 15220
|
| 10705 |
+
},
|
| 10706 |
+
{
|
| 10707 |
+
"epoch": 5.9795838241067925,
|
| 10708 |
+
"grad_norm": 48.305274963378906,
|
| 10709 |
+
"learning_rate": 5.780556969388497e-08,
|
| 10710 |
+
"loss": 0.0936,
|
| 10711 |
+
"step": 15230
|
| 10712 |
+
},
|
| 10713 |
+
{
|
| 10714 |
+
"epoch": 5.983510011778563,
|
| 10715 |
+
"grad_norm": 3.678730010986328,
|
| 10716 |
+
"learning_rate": 4.689885843088781e-08,
|
| 10717 |
+
"loss": 0.0215,
|
| 10718 |
+
"step": 15240
|
| 10719 |
+
},
|
| 10720 |
+
{
|
| 10721 |
+
"epoch": 5.987436199450333,
|
| 10722 |
+
"grad_norm": 0.3298221826553345,
|
| 10723 |
+
"learning_rate": 3.599214716789064e-08,
|
| 10724 |
+
"loss": 0.1976,
|
| 10725 |
+
"step": 15250
|
| 10726 |
+
},
|
| 10727 |
+
{
|
| 10728 |
+
"epoch": 5.991362387122105,
|
| 10729 |
+
"grad_norm": 4.3517255783081055,
|
| 10730 |
+
"learning_rate": 2.508543590489348e-08,
|
| 10731 |
+
"loss": 0.0784,
|
| 10732 |
+
"step": 15260
|
| 10733 |
+
},
|
| 10734 |
+
{
|
| 10735 |
+
"epoch": 5.995288574793875,
|
| 10736 |
+
"grad_norm": 0.4186241924762726,
|
| 10737 |
+
"learning_rate": 1.4178724641896315e-08,
|
| 10738 |
+
"loss": 0.2141,
|
| 10739 |
+
"step": 15270
|
| 10740 |
+
},
|
| 10741 |
+
{
|
| 10742 |
+
"epoch": 5.999214762465646,
|
| 10743 |
+
"grad_norm": 2.5911920070648193,
|
| 10744 |
+
"learning_rate": 3.272013378899149e-09,
|
| 10745 |
+
"loss": 0.0982,
|
| 10746 |
+
"step": 15280
|
| 10747 |
+
},
|
| 10748 |
+
{
|
| 10749 |
+
"epoch": 6.0,
|
| 10750 |
+
"eval_loss": 0.19693690538406372,
|
| 10751 |
+
"eval_runtime": 11.3325,
|
| 10752 |
+
"eval_samples_per_second": 199.78,
|
| 10753 |
+
"eval_steps_per_second": 24.972,
|
| 10754 |
+
"step": 15282
|
| 10755 |
}
|
| 10756 |
],
|
| 10757 |
"logging_steps": 10,
|
|
|
|
| 10766 |
"early_stopping_threshold": 0.0
|
| 10767 |
},
|
| 10768 |
"attributes": {
|
| 10769 |
+
"early_stopping_patience_counter": 0
|
| 10770 |
}
|
| 10771 |
},
|
| 10772 |
"TrainerControl": {
|
|
|
|
| 10775 |
"should_evaluate": false,
|
| 10776 |
"should_log": false,
|
| 10777 |
"should_save": true,
|
| 10778 |
+
"should_training_stop": true
|
| 10779 |
},
|
| 10780 |
"attributes": {}
|
| 10781 |
}
|
| 10782 |
},
|
| 10783 |
+
"total_flos": 3.4143722700698976e+16,
|
| 10784 |
"train_batch_size": 8,
|
| 10785 |
"trial_name": null,
|
| 10786 |
"trial_params": null
|