Update to deberta-featattn-20260623-225422 (RAID AUROC 0.982)
Browse files- checkpoint-5500/model.safetensors +3 -0
- checkpoint-5500/optimizer.pt +3 -0
- checkpoint-5500/rng_state.pth +3 -0
- checkpoint-5500/scheduler.pt +3 -0
- checkpoint-5500/trainer_state.json +1726 -0
- checkpoint-5500/training_args.bin +3 -0
- checkpoint-5626/model.safetensors +3 -0
- checkpoint-5626/optimizer.pt +3 -0
- checkpoint-5626/rng_state.pth +3 -0
- checkpoint-5626/scheduler.pt +3 -0
- checkpoint-5626/trainer_state.json +1774 -0
- checkpoint-5626/training_args.bin +3 -0
- meta.json +2 -2
- onnx/detector_config.json +10 -0
- onnx/meta.json +41 -0
- onnx/model_fp16.onnx +1 -1
- onnx/model_fp32.onnx +1 -1
- onnx/model_int8.onnx +1 -1
- onnx/model_q4.onnx +1 -1
- onnx/raid_results.json +153 -0
- onnx/raid_submission.json +0 -0
- pytorch_model.bin +3 -0
- raid_results.json +153 -0
checkpoint-5500/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2fe3b8087441254064eef4dd6b0f784ea859a81c1f82c6f95d267dff205e2014
|
| 3 |
+
size 736795940
|
checkpoint-5500/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2085e2dc024122d4670129270c990948737f20ceeee05e9a20c058e1300239af
|
| 3 |
+
size 1473711115
|
checkpoint-5500/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1c70b20302e039ff922ef92da23103cbd68279d464265f819dc67cd09814988
|
| 3 |
+
size 14391
|
checkpoint-5500/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc6aef81b6056fc9eeb9dde4ada035fd61be9ef8265329b42c068420e3be8d7b
|
| 3 |
+
size 1529
|
checkpoint-5500/trainer_state.json
ADDED
|
@@ -0,0 +1,1726 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 5500,
|
| 3 |
+
"best_metric": 0.9973359040925472,
|
| 4 |
+
"best_model_checkpoint": "/Users/anudit/Documents/GitHub/slopdetector/checkpoints/deberta-featattn-20260623-225422/checkpoint-5500",
|
| 5 |
+
"epoch": 0.9776039815143974,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 5500,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.004443654461429079,
|
| 14 |
+
"grad_norm": 3.013444662094116,
|
| 15 |
+
"learning_rate": 1.4201183431952664e-06,
|
| 16 |
+
"loss": 0.5004017639160157,
|
| 17 |
+
"step": 25
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.008887308922858158,
|
| 21 |
+
"grad_norm": 2.297563076019287,
|
| 22 |
+
"learning_rate": 2.8994082840236688e-06,
|
| 23 |
+
"loss": 0.3888048934936523,
|
| 24 |
+
"step": 50
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 0.013330963384287239,
|
| 28 |
+
"grad_norm": 1.134047508239746,
|
| 29 |
+
"learning_rate": 4.3786982248520715e-06,
|
| 30 |
+
"loss": 0.2801882553100586,
|
| 31 |
+
"step": 75
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 0.017774617845716316,
|
| 35 |
+
"grad_norm": 0.9834569096565247,
|
| 36 |
+
"learning_rate": 5.857988165680474e-06,
|
| 37 |
+
"loss": 0.24047454833984375,
|
| 38 |
+
"step": 100
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"epoch": 0.022218272307145397,
|
| 42 |
+
"grad_norm": 1.117211103439331,
|
| 43 |
+
"learning_rate": 7.337278106508876e-06,
|
| 44 |
+
"loss": 0.2291146469116211,
|
| 45 |
+
"step": 125
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"epoch": 0.026661926768574477,
|
| 49 |
+
"grad_norm": 0.8506172895431519,
|
| 50 |
+
"learning_rate": 8.816568047337279e-06,
|
| 51 |
+
"loss": 0.1967698097229004,
|
| 52 |
+
"step": 150
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"epoch": 0.031105581230003555,
|
| 56 |
+
"grad_norm": 0.7021474242210388,
|
| 57 |
+
"learning_rate": 1.029585798816568e-05,
|
| 58 |
+
"loss": 0.17146373748779298,
|
| 59 |
+
"step": 175
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"epoch": 0.03554923569143263,
|
| 63 |
+
"grad_norm": 1.2111107110977173,
|
| 64 |
+
"learning_rate": 1.1775147928994083e-05,
|
| 65 |
+
"loss": 0.13825268745422364,
|
| 66 |
+
"step": 200
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"epoch": 0.03999289015286171,
|
| 70 |
+
"grad_norm": 1.9403120279312134,
|
| 71 |
+
"learning_rate": 1.3254437869822488e-05,
|
| 72 |
+
"loss": 0.12618659019470216,
|
| 73 |
+
"step": 225
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"epoch": 0.04443654461429079,
|
| 77 |
+
"grad_norm": 1.8931593894958496,
|
| 78 |
+
"learning_rate": 1.4733727810650888e-05,
|
| 79 |
+
"loss": 0.10122986793518067,
|
| 80 |
+
"step": 250
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"epoch": 0.048880199075719874,
|
| 84 |
+
"grad_norm": 1.8619073629379272,
|
| 85 |
+
"learning_rate": 1.621301775147929e-05,
|
| 86 |
+
"loss": 0.07919074535369873,
|
| 87 |
+
"step": 275
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"epoch": 0.053323853537148955,
|
| 91 |
+
"grad_norm": 1.9105793237686157,
|
| 92 |
+
"learning_rate": 1.7692307692307694e-05,
|
| 93 |
+
"loss": 0.08386680603027344,
|
| 94 |
+
"step": 300
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"epoch": 0.05776750799857803,
|
| 98 |
+
"grad_norm": 2.0332772731781006,
|
| 99 |
+
"learning_rate": 1.9171597633136098e-05,
|
| 100 |
+
"loss": 0.08702397346496582,
|
| 101 |
+
"step": 325
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"epoch": 0.06221116246000711,
|
| 105 |
+
"grad_norm": 0.9020377993583679,
|
| 106 |
+
"learning_rate": 1.995839636913767e-05,
|
| 107 |
+
"loss": 0.06874918460845947,
|
| 108 |
+
"step": 350
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"epoch": 0.06665481692143618,
|
| 112 |
+
"grad_norm": 1.6216212511062622,
|
| 113 |
+
"learning_rate": 1.9863842662632376e-05,
|
| 114 |
+
"loss": 0.06805606842041016,
|
| 115 |
+
"step": 375
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"epoch": 0.07109847138286526,
|
| 119 |
+
"grad_norm": 1.7693337202072144,
|
| 120 |
+
"learning_rate": 1.9769288956127082e-05,
|
| 121 |
+
"loss": 0.06352178573608398,
|
| 122 |
+
"step": 400
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"epoch": 0.07554212584429434,
|
| 126 |
+
"grad_norm": 1.6724389791488647,
|
| 127 |
+
"learning_rate": 1.9674735249621784e-05,
|
| 128 |
+
"loss": 0.0673055648803711,
|
| 129 |
+
"step": 425
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"epoch": 0.07998578030572343,
|
| 133 |
+
"grad_norm": 0.5278561115264893,
|
| 134 |
+
"learning_rate": 1.9580181543116493e-05,
|
| 135 |
+
"loss": 0.06466075897216797,
|
| 136 |
+
"step": 450
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"epoch": 0.0844294347671525,
|
| 140 |
+
"grad_norm": 1.7042737007141113,
|
| 141 |
+
"learning_rate": 1.9485627836611195e-05,
|
| 142 |
+
"loss": 0.0630407428741455,
|
| 143 |
+
"step": 475
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"epoch": 0.08887308922858159,
|
| 147 |
+
"grad_norm": 0.3513544797897339,
|
| 148 |
+
"learning_rate": 1.93910741301059e-05,
|
| 149 |
+
"loss": 0.062327189445495604,
|
| 150 |
+
"step": 500
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"epoch": 0.08887308922858159,
|
| 154 |
+
"eval_accuracy": 0.848,
|
| 155 |
+
"eval_auroc": 0.9866655199587185,
|
| 156 |
+
"eval_f1": 0.8685121107266436,
|
| 157 |
+
"eval_loss": 0.054977674037218094,
|
| 158 |
+
"eval_runtime": 39.8938,
|
| 159 |
+
"eval_samples_per_second": 50.133,
|
| 160 |
+
"eval_steps_per_second": 1.579,
|
| 161 |
+
"eval_tpr_at_fpr1": 0.7455268389662028,
|
| 162 |
+
"eval_tpr_at_fpr5": 0.937375745526839,
|
| 163 |
+
"step": 500
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.09331674369001067,
|
| 167 |
+
"grad_norm": 2.1725878715515137,
|
| 168 |
+
"learning_rate": 1.9296520423600606e-05,
|
| 169 |
+
"loss": 0.05081462860107422,
|
| 170 |
+
"step": 525
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.09776039815143975,
|
| 174 |
+
"grad_norm": 2.587542772293091,
|
| 175 |
+
"learning_rate": 1.9201966717095312e-05,
|
| 176 |
+
"loss": 0.06098108291625977,
|
| 177 |
+
"step": 550
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.10220405261286883,
|
| 181 |
+
"grad_norm": 1.1815265417099,
|
| 182 |
+
"learning_rate": 1.9107413010590018e-05,
|
| 183 |
+
"loss": 0.04866991996765137,
|
| 184 |
+
"step": 575
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.10664770707429791,
|
| 188 |
+
"grad_norm": 1.140872597694397,
|
| 189 |
+
"learning_rate": 1.901285930408472e-05,
|
| 190 |
+
"loss": 0.06058640956878662,
|
| 191 |
+
"step": 600
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.11109136153572698,
|
| 195 |
+
"grad_norm": 1.164772868156433,
|
| 196 |
+
"learning_rate": 1.891830559757943e-05,
|
| 197 |
+
"loss": 0.0485923957824707,
|
| 198 |
+
"step": 625
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.11553501599715606,
|
| 202 |
+
"grad_norm": 2.076003074645996,
|
| 203 |
+
"learning_rate": 1.882375189107413e-05,
|
| 204 |
+
"loss": 0.05198529243469238,
|
| 205 |
+
"step": 650
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.11997867045858514,
|
| 209 |
+
"grad_norm": 2.8677966594696045,
|
| 210 |
+
"learning_rate": 1.8729198184568836e-05,
|
| 211 |
+
"loss": 0.05215679168701172,
|
| 212 |
+
"step": 675
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.12442232492001422,
|
| 216 |
+
"grad_norm": 1.243391752243042,
|
| 217 |
+
"learning_rate": 1.8634644478063542e-05,
|
| 218 |
+
"loss": 0.046974472999572754,
|
| 219 |
+
"step": 700
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.12886597938144329,
|
| 223 |
+
"grad_norm": 1.970794916152954,
|
| 224 |
+
"learning_rate": 1.8540090771558244e-05,
|
| 225 |
+
"loss": 0.051630439758300783,
|
| 226 |
+
"step": 725
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.13330963384287237,
|
| 230 |
+
"grad_norm": 1.031387448310852,
|
| 231 |
+
"learning_rate": 1.8445537065052953e-05,
|
| 232 |
+
"loss": 0.04577981948852539,
|
| 233 |
+
"step": 750
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.13775328830430145,
|
| 237 |
+
"grad_norm": 1.4441957473754883,
|
| 238 |
+
"learning_rate": 1.8350983358547655e-05,
|
| 239 |
+
"loss": 0.05677220821380615,
|
| 240 |
+
"step": 775
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.14219694276573053,
|
| 244 |
+
"grad_norm": 1.2302734851837158,
|
| 245 |
+
"learning_rate": 1.825642965204236e-05,
|
| 246 |
+
"loss": 0.043911681175231934,
|
| 247 |
+
"step": 800
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.1466405972271596,
|
| 251 |
+
"grad_norm": 0.9389927983283997,
|
| 252 |
+
"learning_rate": 1.8161875945537066e-05,
|
| 253 |
+
"loss": 0.04272346019744873,
|
| 254 |
+
"step": 825
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.1510842516885887,
|
| 258 |
+
"grad_norm": 1.342290997505188,
|
| 259 |
+
"learning_rate": 1.8067322239031772e-05,
|
| 260 |
+
"loss": 0.054392943382263186,
|
| 261 |
+
"step": 850
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.15552790615001777,
|
| 265 |
+
"grad_norm": 2.6409666538238525,
|
| 266 |
+
"learning_rate": 1.7972768532526477e-05,
|
| 267 |
+
"loss": 0.04197061061859131,
|
| 268 |
+
"step": 875
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.15997156061144685,
|
| 272 |
+
"grad_norm": 1.1038918495178223,
|
| 273 |
+
"learning_rate": 1.787821482602118e-05,
|
| 274 |
+
"loss": 0.03587212562561035,
|
| 275 |
+
"step": 900
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.16441521507287593,
|
| 279 |
+
"grad_norm": 1.414070725440979,
|
| 280 |
+
"learning_rate": 1.778366111951589e-05,
|
| 281 |
+
"loss": 0.04636185646057129,
|
| 282 |
+
"step": 925
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.168858869534305,
|
| 286 |
+
"grad_norm": 2.164773941040039,
|
| 287 |
+
"learning_rate": 1.768910741301059e-05,
|
| 288 |
+
"loss": 0.044623188972473145,
|
| 289 |
+
"step": 950
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.1733025239957341,
|
| 293 |
+
"grad_norm": 2.06410813331604,
|
| 294 |
+
"learning_rate": 1.7594553706505296e-05,
|
| 295 |
+
"loss": 0.038699045181274414,
|
| 296 |
+
"step": 975
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.17774617845716317,
|
| 300 |
+
"grad_norm": 1.53926420211792,
|
| 301 |
+
"learning_rate": 1.7500000000000002e-05,
|
| 302 |
+
"loss": 0.038100283145904544,
|
| 303 |
+
"step": 1000
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.17774617845716317,
|
| 307 |
+
"eval_accuracy": 0.89,
|
| 308 |
+
"eval_auroc": 0.9912356844846415,
|
| 309 |
+
"eval_f1": 0.9012567324955117,
|
| 310 |
+
"eval_loss": 0.042472898960113525,
|
| 311 |
+
"eval_runtime": 38.57,
|
| 312 |
+
"eval_samples_per_second": 51.854,
|
| 313 |
+
"eval_steps_per_second": 1.633,
|
| 314 |
+
"eval_tpr_at_fpr1": 0.8230616302186878,
|
| 315 |
+
"eval_tpr_at_fpr5": 0.952286282306163,
|
| 316 |
+
"step": 1000
|
| 317 |
+
},
|
| 318 |
+
{
|
| 319 |
+
"epoch": 0.18218983291859225,
|
| 320 |
+
"grad_norm": 0.9646220803260803,
|
| 321 |
+
"learning_rate": 1.7405446293494704e-05,
|
| 322 |
+
"loss": 0.03173836708068847,
|
| 323 |
+
"step": 1025
|
| 324 |
+
},
|
| 325 |
+
{
|
| 326 |
+
"epoch": 0.18663348738002133,
|
| 327 |
+
"grad_norm": 1.0528196096420288,
|
| 328 |
+
"learning_rate": 1.7310892586989413e-05,
|
| 329 |
+
"loss": 0.03988344669342041,
|
| 330 |
+
"step": 1050
|
| 331 |
+
},
|
| 332 |
+
{
|
| 333 |
+
"epoch": 0.19107714184145042,
|
| 334 |
+
"grad_norm": 1.5726221799850464,
|
| 335 |
+
"learning_rate": 1.7216338880484115e-05,
|
| 336 |
+
"loss": 0.044674863815307615,
|
| 337 |
+
"step": 1075
|
| 338 |
+
},
|
| 339 |
+
{
|
| 340 |
+
"epoch": 0.1955207963028795,
|
| 341 |
+
"grad_norm": 1.551660418510437,
|
| 342 |
+
"learning_rate": 1.712178517397882e-05,
|
| 343 |
+
"loss": 0.040711288452148435,
|
| 344 |
+
"step": 1100
|
| 345 |
+
},
|
| 346 |
+
{
|
| 347 |
+
"epoch": 0.19996445076430858,
|
| 348 |
+
"grad_norm": 1.090385913848877,
|
| 349 |
+
"learning_rate": 1.7027231467473526e-05,
|
| 350 |
+
"loss": 0.037872114181518556,
|
| 351 |
+
"step": 1125
|
| 352 |
+
},
|
| 353 |
+
{
|
| 354 |
+
"epoch": 0.20440810522573766,
|
| 355 |
+
"grad_norm": 1.1778202056884766,
|
| 356 |
+
"learning_rate": 1.6932677760968232e-05,
|
| 357 |
+
"loss": 0.039130420684814454,
|
| 358 |
+
"step": 1150
|
| 359 |
+
},
|
| 360 |
+
{
|
| 361 |
+
"epoch": 0.20885175968716674,
|
| 362 |
+
"grad_norm": 1.402064323425293,
|
| 363 |
+
"learning_rate": 1.6838124054462937e-05,
|
| 364 |
+
"loss": 0.03875999212265015,
|
| 365 |
+
"step": 1175
|
| 366 |
+
},
|
| 367 |
+
{
|
| 368 |
+
"epoch": 0.21329541414859582,
|
| 369 |
+
"grad_norm": 0.7979677319526672,
|
| 370 |
+
"learning_rate": 1.674357034795764e-05,
|
| 371 |
+
"loss": 0.033257806301116945,
|
| 372 |
+
"step": 1200
|
| 373 |
+
},
|
| 374 |
+
{
|
| 375 |
+
"epoch": 0.21773906861002487,
|
| 376 |
+
"grad_norm": 2.5630085468292236,
|
| 377 |
+
"learning_rate": 1.664901664145235e-05,
|
| 378 |
+
"loss": 0.03850275993347168,
|
| 379 |
+
"step": 1225
|
| 380 |
+
},
|
| 381 |
+
{
|
| 382 |
+
"epoch": 0.22218272307145395,
|
| 383 |
+
"grad_norm": 1.1255887746810913,
|
| 384 |
+
"learning_rate": 1.655446293494705e-05,
|
| 385 |
+
"loss": 0.035763952732086185,
|
| 386 |
+
"step": 1250
|
| 387 |
+
},
|
| 388 |
+
{
|
| 389 |
+
"epoch": 0.22662637753288303,
|
| 390 |
+
"grad_norm": 2.648975133895874,
|
| 391 |
+
"learning_rate": 1.6459909228441756e-05,
|
| 392 |
+
"loss": 0.04280531883239746,
|
| 393 |
+
"step": 1275
|
| 394 |
+
},
|
| 395 |
+
{
|
| 396 |
+
"epoch": 0.23107003199431211,
|
| 397 |
+
"grad_norm": 1.7409067153930664,
|
| 398 |
+
"learning_rate": 1.6365355521936462e-05,
|
| 399 |
+
"loss": 0.04235891819000244,
|
| 400 |
+
"step": 1300
|
| 401 |
+
},
|
| 402 |
+
{
|
| 403 |
+
"epoch": 0.2355136864557412,
|
| 404 |
+
"grad_norm": 1.5755038261413574,
|
| 405 |
+
"learning_rate": 1.6270801815431164e-05,
|
| 406 |
+
"loss": 0.04084760665893555,
|
| 407 |
+
"step": 1325
|
| 408 |
+
},
|
| 409 |
+
{
|
| 410 |
+
"epoch": 0.23995734091717028,
|
| 411 |
+
"grad_norm": 1.2442480325698853,
|
| 412 |
+
"learning_rate": 1.6176248108925873e-05,
|
| 413 |
+
"loss": 0.03942857027053833,
|
| 414 |
+
"step": 1350
|
| 415 |
+
},
|
| 416 |
+
{
|
| 417 |
+
"epoch": 0.24440099537859936,
|
| 418 |
+
"grad_norm": 0.7775816321372986,
|
| 419 |
+
"learning_rate": 1.6081694402420575e-05,
|
| 420 |
+
"loss": 0.039197320938110354,
|
| 421 |
+
"step": 1375
|
| 422 |
+
},
|
| 423 |
+
{
|
| 424 |
+
"epoch": 0.24884464984002844,
|
| 425 |
+
"grad_norm": 0.44854021072387695,
|
| 426 |
+
"learning_rate": 1.598714069591528e-05,
|
| 427 |
+
"loss": 0.030667483806610107,
|
| 428 |
+
"step": 1400
|
| 429 |
+
},
|
| 430 |
+
{
|
| 431 |
+
"epoch": 0.25328830430145755,
|
| 432 |
+
"grad_norm": 0.9631138443946838,
|
| 433 |
+
"learning_rate": 1.5892586989409986e-05,
|
| 434 |
+
"loss": 0.03460927009582519,
|
| 435 |
+
"step": 1425
|
| 436 |
+
},
|
| 437 |
+
{
|
| 438 |
+
"epoch": 0.25773195876288657,
|
| 439 |
+
"grad_norm": 0.8312052488327026,
|
| 440 |
+
"learning_rate": 1.5798033282904692e-05,
|
| 441 |
+
"loss": 0.03320029735565186,
|
| 442 |
+
"step": 1450
|
| 443 |
+
},
|
| 444 |
+
{
|
| 445 |
+
"epoch": 0.26217561322431565,
|
| 446 |
+
"grad_norm": 1.1160472631454468,
|
| 447 |
+
"learning_rate": 1.5703479576399397e-05,
|
| 448 |
+
"loss": 0.03198946952819824,
|
| 449 |
+
"step": 1475
|
| 450 |
+
},
|
| 451 |
+
{
|
| 452 |
+
"epoch": 0.26661926768574473,
|
| 453 |
+
"grad_norm": 1.6029430627822876,
|
| 454 |
+
"learning_rate": 1.56089258698941e-05,
|
| 455 |
+
"loss": 0.034000282287597654,
|
| 456 |
+
"step": 1500
|
| 457 |
+
},
|
| 458 |
+
{
|
| 459 |
+
"epoch": 0.26661926768574473,
|
| 460 |
+
"eval_accuracy": 0.8785,
|
| 461 |
+
"eval_auroc": 0.9909046725682125,
|
| 462 |
+
"eval_f1": 0.8921438082556591,
|
| 463 |
+
"eval_loss": 0.04817873612046242,
|
| 464 |
+
"eval_runtime": 44.1179,
|
| 465 |
+
"eval_samples_per_second": 45.333,
|
| 466 |
+
"eval_steps_per_second": 1.428,
|
| 467 |
+
"eval_tpr_at_fpr1": 0.8021868787276342,
|
| 468 |
+
"eval_tpr_at_fpr5": 0.9572564612326043,
|
| 469 |
+
"step": 1500
|
| 470 |
+
},
|
| 471 |
+
{
|
| 472 |
+
"epoch": 0.2710629221471738,
|
| 473 |
+
"grad_norm": 2.1138088703155518,
|
| 474 |
+
"learning_rate": 1.5514372163388805e-05,
|
| 475 |
+
"loss": 0.03080030918121338,
|
| 476 |
+
"step": 1525
|
| 477 |
+
},
|
| 478 |
+
{
|
| 479 |
+
"epoch": 0.2755065766086029,
|
| 480 |
+
"grad_norm": 2.206002950668335,
|
| 481 |
+
"learning_rate": 1.541981845688351e-05,
|
| 482 |
+
"loss": 0.04029146194458008,
|
| 483 |
+
"step": 1550
|
| 484 |
+
},
|
| 485 |
+
{
|
| 486 |
+
"epoch": 0.279950231070032,
|
| 487 |
+
"grad_norm": 1.8083148002624512,
|
| 488 |
+
"learning_rate": 1.5325264750378216e-05,
|
| 489 |
+
"loss": 0.028056590557098388,
|
| 490 |
+
"step": 1575
|
| 491 |
+
},
|
| 492 |
+
{
|
| 493 |
+
"epoch": 0.28439388553146105,
|
| 494 |
+
"grad_norm": 0.9714005589485168,
|
| 495 |
+
"learning_rate": 1.5230711043872922e-05,
|
| 496 |
+
"loss": 0.033678176403045657,
|
| 497 |
+
"step": 1600
|
| 498 |
+
},
|
| 499 |
+
{
|
| 500 |
+
"epoch": 0.28883753999289014,
|
| 501 |
+
"grad_norm": 0.29741403460502625,
|
| 502 |
+
"learning_rate": 1.5136157337367626e-05,
|
| 503 |
+
"loss": 0.03180084943771362,
|
| 504 |
+
"step": 1625
|
| 505 |
+
},
|
| 506 |
+
{
|
| 507 |
+
"epoch": 0.2932811944543192,
|
| 508 |
+
"grad_norm": 0.516327440738678,
|
| 509 |
+
"learning_rate": 1.5041603630862331e-05,
|
| 510 |
+
"loss": 0.03431586980819702,
|
| 511 |
+
"step": 1650
|
| 512 |
+
},
|
| 513 |
+
{
|
| 514 |
+
"epoch": 0.2977248489157483,
|
| 515 |
+
"grad_norm": 0.7245854735374451,
|
| 516 |
+
"learning_rate": 1.4947049924357035e-05,
|
| 517 |
+
"loss": 0.03058172941207886,
|
| 518 |
+
"step": 1675
|
| 519 |
+
},
|
| 520 |
+
{
|
| 521 |
+
"epoch": 0.3021685033771774,
|
| 522 |
+
"grad_norm": 2.4247725009918213,
|
| 523 |
+
"learning_rate": 1.4852496217851742e-05,
|
| 524 |
+
"loss": 0.04614161014556885,
|
| 525 |
+
"step": 1700
|
| 526 |
+
},
|
| 527 |
+
{
|
| 528 |
+
"epoch": 0.30661215783860646,
|
| 529 |
+
"grad_norm": 1.5023690462112427,
|
| 530 |
+
"learning_rate": 1.4757942511346446e-05,
|
| 531 |
+
"loss": 0.03227074861526489,
|
| 532 |
+
"step": 1725
|
| 533 |
+
},
|
| 534 |
+
{
|
| 535 |
+
"epoch": 0.31105581230003554,
|
| 536 |
+
"grad_norm": 0.5162255764007568,
|
| 537 |
+
"learning_rate": 1.466338880484115e-05,
|
| 538 |
+
"loss": 0.037947914600372314,
|
| 539 |
+
"step": 1750
|
| 540 |
+
},
|
| 541 |
+
{
|
| 542 |
+
"epoch": 0.3154994667614646,
|
| 543 |
+
"grad_norm": 0.8927256464958191,
|
| 544 |
+
"learning_rate": 1.4568835098335856e-05,
|
| 545 |
+
"loss": 0.04013613700866699,
|
| 546 |
+
"step": 1775
|
| 547 |
+
},
|
| 548 |
+
{
|
| 549 |
+
"epoch": 0.3199431212228937,
|
| 550 |
+
"grad_norm": 1.9027554988861084,
|
| 551 |
+
"learning_rate": 1.447428139183056e-05,
|
| 552 |
+
"loss": 0.04084087371826172,
|
| 553 |
+
"step": 1800
|
| 554 |
+
},
|
| 555 |
+
{
|
| 556 |
+
"epoch": 0.3243867756843228,
|
| 557 |
+
"grad_norm": 1.0974030494689941,
|
| 558 |
+
"learning_rate": 1.4379727685325267e-05,
|
| 559 |
+
"loss": 0.02415942192077637,
|
| 560 |
+
"step": 1825
|
| 561 |
+
},
|
| 562 |
+
{
|
| 563 |
+
"epoch": 0.32883043014575186,
|
| 564 |
+
"grad_norm": 1.2848249673843384,
|
| 565 |
+
"learning_rate": 1.428517397881997e-05,
|
| 566 |
+
"loss": 0.03221212863922119,
|
| 567 |
+
"step": 1850
|
| 568 |
+
},
|
| 569 |
+
{
|
| 570 |
+
"epoch": 0.33327408460718094,
|
| 571 |
+
"grad_norm": 0.8059474229812622,
|
| 572 |
+
"learning_rate": 1.4190620272314676e-05,
|
| 573 |
+
"loss": 0.037272207736968994,
|
| 574 |
+
"step": 1875
|
| 575 |
+
},
|
| 576 |
+
{
|
| 577 |
+
"epoch": 0.33771773906861,
|
| 578 |
+
"grad_norm": 1.0132513046264648,
|
| 579 |
+
"learning_rate": 1.4096066565809382e-05,
|
| 580 |
+
"loss": 0.029253509044647217,
|
| 581 |
+
"step": 1900
|
| 582 |
+
},
|
| 583 |
+
{
|
| 584 |
+
"epoch": 0.3421613935300391,
|
| 585 |
+
"grad_norm": 0.7545719742774963,
|
| 586 |
+
"learning_rate": 1.4001512859304086e-05,
|
| 587 |
+
"loss": 0.03997385501861572,
|
| 588 |
+
"step": 1925
|
| 589 |
+
},
|
| 590 |
+
{
|
| 591 |
+
"epoch": 0.3466050479914682,
|
| 592 |
+
"grad_norm": 0.37751272320747375,
|
| 593 |
+
"learning_rate": 1.3906959152798791e-05,
|
| 594 |
+
"loss": 0.027481729984283446,
|
| 595 |
+
"step": 1950
|
| 596 |
+
},
|
| 597 |
+
{
|
| 598 |
+
"epoch": 0.35104870245289727,
|
| 599 |
+
"grad_norm": 1.522934079170227,
|
| 600 |
+
"learning_rate": 1.3812405446293495e-05,
|
| 601 |
+
"loss": 0.028633484840393065,
|
| 602 |
+
"step": 1975
|
| 603 |
+
},
|
| 604 |
+
{
|
| 605 |
+
"epoch": 0.35549235691432635,
|
| 606 |
+
"grad_norm": 1.2354328632354736,
|
| 607 |
+
"learning_rate": 1.3717851739788202e-05,
|
| 608 |
+
"loss": 0.022525691986083986,
|
| 609 |
+
"step": 2000
|
| 610 |
+
},
|
| 611 |
+
{
|
| 612 |
+
"epoch": 0.35549235691432635,
|
| 613 |
+
"eval_accuracy": 0.936,
|
| 614 |
+
"eval_auroc": 0.9948458144493202,
|
| 615 |
+
"eval_f1": 0.9399624765478424,
|
| 616 |
+
"eval_loss": 0.030829520896077156,
|
| 617 |
+
"eval_runtime": 40.0972,
|
| 618 |
+
"eval_samples_per_second": 49.879,
|
| 619 |
+
"eval_steps_per_second": 1.571,
|
| 620 |
+
"eval_tpr_at_fpr1": 0.9125248508946322,
|
| 621 |
+
"eval_tpr_at_fpr5": 0.9781312127236581,
|
| 622 |
+
"step": 2000
|
| 623 |
+
},
|
| 624 |
+
{
|
| 625 |
+
"epoch": 0.3599360113757554,
|
| 626 |
+
"grad_norm": 1.0933098793029785,
|
| 627 |
+
"learning_rate": 1.3623298033282906e-05,
|
| 628 |
+
"loss": 0.03529852867126465,
|
| 629 |
+
"step": 2025
|
| 630 |
+
},
|
| 631 |
+
{
|
| 632 |
+
"epoch": 0.3643796658371845,
|
| 633 |
+
"grad_norm": 2.0476551055908203,
|
| 634 |
+
"learning_rate": 1.352874432677761e-05,
|
| 635 |
+
"loss": 0.03386972665786743,
|
| 636 |
+
"step": 2050
|
| 637 |
+
},
|
| 638 |
+
{
|
| 639 |
+
"epoch": 0.3688233202986136,
|
| 640 |
+
"grad_norm": 0.9298884868621826,
|
| 641 |
+
"learning_rate": 1.3434190620272315e-05,
|
| 642 |
+
"loss": 0.03486936330795288,
|
| 643 |
+
"step": 2075
|
| 644 |
+
},
|
| 645 |
+
{
|
| 646 |
+
"epoch": 0.37326697476004267,
|
| 647 |
+
"grad_norm": 0.7512989044189453,
|
| 648 |
+
"learning_rate": 1.333963691376702e-05,
|
| 649 |
+
"loss": 0.0267183780670166,
|
| 650 |
+
"step": 2100
|
| 651 |
+
},
|
| 652 |
+
{
|
| 653 |
+
"epoch": 0.37771062922147175,
|
| 654 |
+
"grad_norm": 1.4821196794509888,
|
| 655 |
+
"learning_rate": 1.3245083207261727e-05,
|
| 656 |
+
"loss": 0.021263403892517088,
|
| 657 |
+
"step": 2125
|
| 658 |
+
},
|
| 659 |
+
{
|
| 660 |
+
"epoch": 0.38215428368290083,
|
| 661 |
+
"grad_norm": 0.8745072484016418,
|
| 662 |
+
"learning_rate": 1.315052950075643e-05,
|
| 663 |
+
"loss": 0.0272180438041687,
|
| 664 |
+
"step": 2150
|
| 665 |
+
},
|
| 666 |
+
{
|
| 667 |
+
"epoch": 0.3865979381443299,
|
| 668 |
+
"grad_norm": 1.6741865873336792,
|
| 669 |
+
"learning_rate": 1.3055975794251136e-05,
|
| 670 |
+
"loss": 0.026980955600738526,
|
| 671 |
+
"step": 2175
|
| 672 |
+
},
|
| 673 |
+
{
|
| 674 |
+
"epoch": 0.391041592605759,
|
| 675 |
+
"grad_norm": 0.8200652599334717,
|
| 676 |
+
"learning_rate": 1.2961422087745842e-05,
|
| 677 |
+
"loss": 0.027142252922058106,
|
| 678 |
+
"step": 2200
|
| 679 |
+
},
|
| 680 |
+
{
|
| 681 |
+
"epoch": 0.3954852470671881,
|
| 682 |
+
"grad_norm": 1.5616494417190552,
|
| 683 |
+
"learning_rate": 1.2866868381240545e-05,
|
| 684 |
+
"loss": 0.030536642074584962,
|
| 685 |
+
"step": 2225
|
| 686 |
+
},
|
| 687 |
+
{
|
| 688 |
+
"epoch": 0.39992890152861715,
|
| 689 |
+
"grad_norm": 0.7505294680595398,
|
| 690 |
+
"learning_rate": 1.2772314674735251e-05,
|
| 691 |
+
"loss": 0.03007654905319214,
|
| 692 |
+
"step": 2250
|
| 693 |
+
},
|
| 694 |
+
{
|
| 695 |
+
"epoch": 0.40437255599004623,
|
| 696 |
+
"grad_norm": 0.3857294023036957,
|
| 697 |
+
"learning_rate": 1.2677760968229955e-05,
|
| 698 |
+
"loss": 0.02641111135482788,
|
| 699 |
+
"step": 2275
|
| 700 |
+
},
|
| 701 |
+
{
|
| 702 |
+
"epoch": 0.4088162104514753,
|
| 703 |
+
"grad_norm": 0.9879816174507141,
|
| 704 |
+
"learning_rate": 1.2583207261724662e-05,
|
| 705 |
+
"loss": 0.027107694149017335,
|
| 706 |
+
"step": 2300
|
| 707 |
+
},
|
| 708 |
+
{
|
| 709 |
+
"epoch": 0.4132598649129044,
|
| 710 |
+
"grad_norm": 0.5398420095443726,
|
| 711 |
+
"learning_rate": 1.2488653555219366e-05,
|
| 712 |
+
"loss": 0.027692139148712158,
|
| 713 |
+
"step": 2325
|
| 714 |
+
},
|
| 715 |
+
{
|
| 716 |
+
"epoch": 0.4177035193743335,
|
| 717 |
+
"grad_norm": 0.8365870118141174,
|
| 718 |
+
"learning_rate": 1.239409984871407e-05,
|
| 719 |
+
"loss": 0.029139807224273683,
|
| 720 |
+
"step": 2350
|
| 721 |
+
},
|
| 722 |
+
{
|
| 723 |
+
"epoch": 0.42214717383576256,
|
| 724 |
+
"grad_norm": 1.1654356718063354,
|
| 725 |
+
"learning_rate": 1.2299546142208775e-05,
|
| 726 |
+
"loss": 0.025624027252197267,
|
| 727 |
+
"step": 2375
|
| 728 |
+
},
|
| 729 |
+
{
|
| 730 |
+
"epoch": 0.42659082829719164,
|
| 731 |
+
"grad_norm": 0.8927724361419678,
|
| 732 |
+
"learning_rate": 1.220499243570348e-05,
|
| 733 |
+
"loss": 0.032838408946990964,
|
| 734 |
+
"step": 2400
|
| 735 |
+
},
|
| 736 |
+
{
|
| 737 |
+
"epoch": 0.43103448275862066,
|
| 738 |
+
"grad_norm": 1.7535399198532104,
|
| 739 |
+
"learning_rate": 1.2110438729198187e-05,
|
| 740 |
+
"loss": 0.03213581085205078,
|
| 741 |
+
"step": 2425
|
| 742 |
+
},
|
| 743 |
+
{
|
| 744 |
+
"epoch": 0.43547813722004974,
|
| 745 |
+
"grad_norm": 1.506422996520996,
|
| 746 |
+
"learning_rate": 1.201588502269289e-05,
|
| 747 |
+
"loss": 0.031965067386627195,
|
| 748 |
+
"step": 2450
|
| 749 |
+
},
|
| 750 |
+
{
|
| 751 |
+
"epoch": 0.4399217916814788,
|
| 752 |
+
"grad_norm": 1.933950424194336,
|
| 753 |
+
"learning_rate": 1.1921331316187596e-05,
|
| 754 |
+
"loss": 0.02685023784637451,
|
| 755 |
+
"step": 2475
|
| 756 |
+
},
|
| 757 |
+
{
|
| 758 |
+
"epoch": 0.4443654461429079,
|
| 759 |
+
"grad_norm": 0.8511770963668823,
|
| 760 |
+
"learning_rate": 1.18267776096823e-05,
|
| 761 |
+
"loss": 0.03357476472854614,
|
| 762 |
+
"step": 2500
|
| 763 |
+
},
|
| 764 |
+
{
|
| 765 |
+
"epoch": 0.4443654461429079,
|
| 766 |
+
"eval_accuracy": 0.9095,
|
| 767 |
+
"eval_auroc": 0.9952128276617958,
|
| 768 |
+
"eval_f1": 0.9173893199452305,
|
| 769 |
+
"eval_loss": 0.030984506011009216,
|
| 770 |
+
"eval_runtime": 40.8627,
|
| 771 |
+
"eval_samples_per_second": 48.944,
|
| 772 |
+
"eval_steps_per_second": 1.542,
|
| 773 |
+
"eval_tpr_at_fpr1": 0.856858846918489,
|
| 774 |
+
"eval_tpr_at_fpr5": 0.9821073558648111,
|
| 775 |
+
"step": 2500
|
| 776 |
+
},
|
| 777 |
+
{
|
| 778 |
+
"epoch": 0.448809100604337,
|
| 779 |
+
"grad_norm": 1.3411929607391357,
|
| 780 |
+
"learning_rate": 1.1732223903177005e-05,
|
| 781 |
+
"loss": 0.026271984577178956,
|
| 782 |
+
"step": 2525
|
| 783 |
+
},
|
| 784 |
+
{
|
| 785 |
+
"epoch": 0.45325275506576607,
|
| 786 |
+
"grad_norm": 1.3650128841400146,
|
| 787 |
+
"learning_rate": 1.1637670196671711e-05,
|
| 788 |
+
"loss": 0.030547237396240233,
|
| 789 |
+
"step": 2550
|
| 790 |
+
},
|
| 791 |
+
{
|
| 792 |
+
"epoch": 0.45769640952719515,
|
| 793 |
+
"grad_norm": 0.7035048007965088,
|
| 794 |
+
"learning_rate": 1.1543116490166415e-05,
|
| 795 |
+
"loss": 0.026542000770568848,
|
| 796 |
+
"step": 2575
|
| 797 |
+
},
|
| 798 |
+
{
|
| 799 |
+
"epoch": 0.46214006398862423,
|
| 800 |
+
"grad_norm": 1.3388855457305908,
|
| 801 |
+
"learning_rate": 1.1448562783661122e-05,
|
| 802 |
+
"loss": 0.024521036148071287,
|
| 803 |
+
"step": 2600
|
| 804 |
+
},
|
| 805 |
+
{
|
| 806 |
+
"epoch": 0.4665837184500533,
|
| 807 |
+
"grad_norm": 1.0085132122039795,
|
| 808 |
+
"learning_rate": 1.1354009077155826e-05,
|
| 809 |
+
"loss": 0.024952406883239745,
|
| 810 |
+
"step": 2625
|
| 811 |
+
},
|
| 812 |
+
{
|
| 813 |
+
"epoch": 0.4710273729114824,
|
| 814 |
+
"grad_norm": 0.30464261770248413,
|
| 815 |
+
"learning_rate": 1.125945537065053e-05,
|
| 816 |
+
"loss": 0.02288907766342163,
|
| 817 |
+
"step": 2650
|
| 818 |
+
},
|
| 819 |
+
{
|
| 820 |
+
"epoch": 0.47547102737291147,
|
| 821 |
+
"grad_norm": 0.6784248948097229,
|
| 822 |
+
"learning_rate": 1.1164901664145235e-05,
|
| 823 |
+
"loss": 0.024585678577423095,
|
| 824 |
+
"step": 2675
|
| 825 |
+
},
|
| 826 |
+
{
|
| 827 |
+
"epoch": 0.47991468183434055,
|
| 828 |
+
"grad_norm": 1.2737281322479248,
|
| 829 |
+
"learning_rate": 1.107034795763994e-05,
|
| 830 |
+
"loss": 0.028601126670837404,
|
| 831 |
+
"step": 2700
|
| 832 |
+
},
|
| 833 |
+
{
|
| 834 |
+
"epoch": 0.48435833629576963,
|
| 835 |
+
"grad_norm": 1.2060391902923584,
|
| 836 |
+
"learning_rate": 1.0975794251134646e-05,
|
| 837 |
+
"loss": 0.03009690284729004,
|
| 838 |
+
"step": 2725
|
| 839 |
+
},
|
| 840 |
+
{
|
| 841 |
+
"epoch": 0.4888019907571987,
|
| 842 |
+
"grad_norm": 0.9331129789352417,
|
| 843 |
+
"learning_rate": 1.088124054462935e-05,
|
| 844 |
+
"loss": 0.024897255897521973,
|
| 845 |
+
"step": 2750
|
| 846 |
+
},
|
| 847 |
+
{
|
| 848 |
+
"epoch": 0.4932456452186278,
|
| 849 |
+
"grad_norm": 0.7035834789276123,
|
| 850 |
+
"learning_rate": 1.0786686838124056e-05,
|
| 851 |
+
"loss": 0.029437661170959473,
|
| 852 |
+
"step": 2775
|
| 853 |
+
},
|
| 854 |
+
{
|
| 855 |
+
"epoch": 0.4976892996800569,
|
| 856 |
+
"grad_norm": 1.3447843790054321,
|
| 857 |
+
"learning_rate": 1.069213313161876e-05,
|
| 858 |
+
"loss": 0.024274458885192873,
|
| 859 |
+
"step": 2800
|
| 860 |
+
},
|
| 861 |
+
{
|
| 862 |
+
"epoch": 0.502132954141486,
|
| 863 |
+
"grad_norm": 0.7223392724990845,
|
| 864 |
+
"learning_rate": 1.0597579425113464e-05,
|
| 865 |
+
"loss": 0.029445352554321288,
|
| 866 |
+
"step": 2825
|
| 867 |
+
},
|
| 868 |
+
{
|
| 869 |
+
"epoch": 0.5065766086029151,
|
| 870 |
+
"grad_norm": 1.4334781169891357,
|
| 871 |
+
"learning_rate": 1.0503025718608171e-05,
|
| 872 |
+
"loss": 0.0277593731880188,
|
| 873 |
+
"step": 2850
|
| 874 |
+
},
|
| 875 |
+
{
|
| 876 |
+
"epoch": 0.5110202630643441,
|
| 877 |
+
"grad_norm": 1.4802097082138062,
|
| 878 |
+
"learning_rate": 1.0408472012102875e-05,
|
| 879 |
+
"loss": 0.029638910293579103,
|
| 880 |
+
"step": 2875
|
| 881 |
+
},
|
| 882 |
+
{
|
| 883 |
+
"epoch": 0.5154639175257731,
|
| 884 |
+
"grad_norm": 1.0358779430389404,
|
| 885 |
+
"learning_rate": 1.031391830559758e-05,
|
| 886 |
+
"loss": 0.021964311599731445,
|
| 887 |
+
"step": 2900
|
| 888 |
+
},
|
| 889 |
+
{
|
| 890 |
+
"epoch": 0.5199075719872023,
|
| 891 |
+
"grad_norm": 0.5717483758926392,
|
| 892 |
+
"learning_rate": 1.0219364599092286e-05,
|
| 893 |
+
"loss": 0.02695397138595581,
|
| 894 |
+
"step": 2925
|
| 895 |
+
},
|
| 896 |
+
{
|
| 897 |
+
"epoch": 0.5243512264486313,
|
| 898 |
+
"grad_norm": 0.5034074783325195,
|
| 899 |
+
"learning_rate": 1.012481089258699e-05,
|
| 900 |
+
"loss": 0.02091419219970703,
|
| 901 |
+
"step": 2950
|
| 902 |
+
},
|
| 903 |
+
{
|
| 904 |
+
"epoch": 0.5287948809100604,
|
| 905 |
+
"grad_norm": 1.093700647354126,
|
| 906 |
+
"learning_rate": 1.0030257186081695e-05,
|
| 907 |
+
"loss": 0.018702698945999144,
|
| 908 |
+
"step": 2975
|
| 909 |
+
},
|
| 910 |
+
{
|
| 911 |
+
"epoch": 0.5332385353714895,
|
| 912 |
+
"grad_norm": 0.5766699910163879,
|
| 913 |
+
"learning_rate": 9.935703479576401e-06,
|
| 914 |
+
"loss": 0.02352435827255249,
|
| 915 |
+
"step": 3000
|
| 916 |
+
},
|
| 917 |
+
{
|
| 918 |
+
"epoch": 0.5332385353714895,
|
| 919 |
+
"eval_accuracy": 0.903,
|
| 920 |
+
"eval_auroc": 0.992703737334544,
|
| 921 |
+
"eval_f1": 0.9120580235720762,
|
| 922 |
+
"eval_loss": 0.036512941122055054,
|
| 923 |
+
"eval_runtime": 40.4309,
|
| 924 |
+
"eval_samples_per_second": 49.467,
|
| 925 |
+
"eval_steps_per_second": 1.558,
|
| 926 |
+
"eval_tpr_at_fpr1": 0.852882703777336,
|
| 927 |
+
"eval_tpr_at_fpr5": 0.9582504970178927,
|
| 928 |
+
"step": 3000
|
| 929 |
+
},
|
| 930 |
+
{
|
| 931 |
+
"epoch": 0.5376821898329186,
|
| 932 |
+
"grad_norm": 1.6101889610290527,
|
| 933 |
+
"learning_rate": 9.841149773071105e-06,
|
| 934 |
+
"loss": 0.02831456422805786,
|
| 935 |
+
"step": 3025
|
| 936 |
+
},
|
| 937 |
+
{
|
| 938 |
+
"epoch": 0.5421258442943476,
|
| 939 |
+
"grad_norm": 1.7061760425567627,
|
| 940 |
+
"learning_rate": 9.74659606656581e-06,
|
| 941 |
+
"loss": 0.023692820072174072,
|
| 942 |
+
"step": 3050
|
| 943 |
+
},
|
| 944 |
+
{
|
| 945 |
+
"epoch": 0.5465694987557768,
|
| 946 |
+
"grad_norm": 1.293489933013916,
|
| 947 |
+
"learning_rate": 9.652042360060516e-06,
|
| 948 |
+
"loss": 0.021458499431610108,
|
| 949 |
+
"step": 3075
|
| 950 |
+
},
|
| 951 |
+
{
|
| 952 |
+
"epoch": 0.5510131532172058,
|
| 953 |
+
"grad_norm": 1.280171513557434,
|
| 954 |
+
"learning_rate": 9.55748865355522e-06,
|
| 955 |
+
"loss": 0.023303213119506835,
|
| 956 |
+
"step": 3100
|
| 957 |
+
},
|
| 958 |
+
{
|
| 959 |
+
"epoch": 0.5554568076786349,
|
| 960 |
+
"grad_norm": 1.2874751091003418,
|
| 961 |
+
"learning_rate": 9.462934947049925e-06,
|
| 962 |
+
"loss": 0.027433459758758546,
|
| 963 |
+
"step": 3125
|
| 964 |
+
},
|
| 965 |
+
{
|
| 966 |
+
"epoch": 0.559900462140064,
|
| 967 |
+
"grad_norm": 1.2265180349349976,
|
| 968 |
+
"learning_rate": 9.36838124054463e-06,
|
| 969 |
+
"loss": 0.02306551456451416,
|
| 970 |
+
"step": 3150
|
| 971 |
+
},
|
| 972 |
+
{
|
| 973 |
+
"epoch": 0.5643441166014931,
|
| 974 |
+
"grad_norm": 2.207395076751709,
|
| 975 |
+
"learning_rate": 9.273827534039335e-06,
|
| 976 |
+
"loss": 0.030330984592437743,
|
| 977 |
+
"step": 3175
|
| 978 |
+
},
|
| 979 |
+
{
|
| 980 |
+
"epoch": 0.5687877710629221,
|
| 981 |
+
"grad_norm": 0.700985312461853,
|
| 982 |
+
"learning_rate": 9.17927382753404e-06,
|
| 983 |
+
"loss": 0.02533963918685913,
|
| 984 |
+
"step": 3200
|
| 985 |
+
},
|
| 986 |
+
{
|
| 987 |
+
"epoch": 0.5732314255243512,
|
| 988 |
+
"grad_norm": 0.8443852663040161,
|
| 989 |
+
"learning_rate": 9.084720121028746e-06,
|
| 990 |
+
"loss": 0.029710006713867188,
|
| 991 |
+
"step": 3225
|
| 992 |
+
},
|
| 993 |
+
{
|
| 994 |
+
"epoch": 0.5776750799857803,
|
| 995 |
+
"grad_norm": 0.5237564444541931,
|
| 996 |
+
"learning_rate": 8.99016641452345e-06,
|
| 997 |
+
"loss": 0.02827130079269409,
|
| 998 |
+
"step": 3250
|
| 999 |
+
},
|
| 1000 |
+
{
|
| 1001 |
+
"epoch": 0.5821187344472094,
|
| 1002 |
+
"grad_norm": 1.318710446357727,
|
| 1003 |
+
"learning_rate": 8.895612708018155e-06,
|
| 1004 |
+
"loss": 0.017649848461151123,
|
| 1005 |
+
"step": 3275
|
| 1006 |
+
},
|
| 1007 |
+
{
|
| 1008 |
+
"epoch": 0.5865623889086384,
|
| 1009 |
+
"grad_norm": 2.1418726444244385,
|
| 1010 |
+
"learning_rate": 8.80105900151286e-06,
|
| 1011 |
+
"loss": 0.028039700984954834,
|
| 1012 |
+
"step": 3300
|
| 1013 |
+
},
|
| 1014 |
+
{
|
| 1015 |
+
"epoch": 0.5910060433700676,
|
| 1016 |
+
"grad_norm": 0.6394239068031311,
|
| 1017 |
+
"learning_rate": 8.706505295007565e-06,
|
| 1018 |
+
"loss": 0.029724645614624023,
|
| 1019 |
+
"step": 3325
|
| 1020 |
+
},
|
| 1021 |
+
{
|
| 1022 |
+
"epoch": 0.5954496978314966,
|
| 1023 |
+
"grad_norm": 0.44896772503852844,
|
| 1024 |
+
"learning_rate": 8.61195158850227e-06,
|
| 1025 |
+
"loss": 0.026093797683715822,
|
| 1026 |
+
"step": 3350
|
| 1027 |
+
},
|
| 1028 |
+
{
|
| 1029 |
+
"epoch": 0.5998933522929257,
|
| 1030 |
+
"grad_norm": 2.3762757778167725,
|
| 1031 |
+
"learning_rate": 8.517397881996974e-06,
|
| 1032 |
+
"loss": 0.027712843418121337,
|
| 1033 |
+
"step": 3375
|
| 1034 |
+
},
|
| 1035 |
+
{
|
| 1036 |
+
"epoch": 0.6043370067543548,
|
| 1037 |
+
"grad_norm": 1.4584051370620728,
|
| 1038 |
+
"learning_rate": 8.42284417549168e-06,
|
| 1039 |
+
"loss": 0.031196737289428712,
|
| 1040 |
+
"step": 3400
|
| 1041 |
+
},
|
| 1042 |
+
{
|
| 1043 |
+
"epoch": 0.6087806612157839,
|
| 1044 |
+
"grad_norm": 2.2569475173950195,
|
| 1045 |
+
"learning_rate": 8.328290468986385e-06,
|
| 1046 |
+
"loss": 0.026621932983398437,
|
| 1047 |
+
"step": 3425
|
| 1048 |
+
},
|
| 1049 |
+
{
|
| 1050 |
+
"epoch": 0.6132243156772129,
|
| 1051 |
+
"grad_norm": 1.9737194776535034,
|
| 1052 |
+
"learning_rate": 8.23373676248109e-06,
|
| 1053 |
+
"loss": 0.020163617134094237,
|
| 1054 |
+
"step": 3450
|
| 1055 |
+
},
|
| 1056 |
+
{
|
| 1057 |
+
"epoch": 0.617667970138642,
|
| 1058 |
+
"grad_norm": 0.9083975553512573,
|
| 1059 |
+
"learning_rate": 8.139183055975795e-06,
|
| 1060 |
+
"loss": 0.0209149169921875,
|
| 1061 |
+
"step": 3475
|
| 1062 |
+
},
|
| 1063 |
+
{
|
| 1064 |
+
"epoch": 0.6221116246000711,
|
| 1065 |
+
"grad_norm": 1.0563571453094482,
|
| 1066 |
+
"learning_rate": 8.0446293494705e-06,
|
| 1067 |
+
"loss": 0.025532805919647218,
|
| 1068 |
+
"step": 3500
|
| 1069 |
+
},
|
| 1070 |
+
{
|
| 1071 |
+
"epoch": 0.6221116246000711,
|
| 1072 |
+
"eval_accuracy": 0.8925,
|
| 1073 |
+
"eval_auroc": 0.9965268749674989,
|
| 1074 |
+
"eval_f1": 0.9033707865168539,
|
| 1075 |
+
"eval_loss": 0.029821457341313362,
|
| 1076 |
+
"eval_runtime": 40.7571,
|
| 1077 |
+
"eval_samples_per_second": 49.071,
|
| 1078 |
+
"eval_steps_per_second": 1.546,
|
| 1079 |
+
"eval_tpr_at_fpr1": 0.9254473161033797,
|
| 1080 |
+
"eval_tpr_at_fpr5": 0.9850894632206759,
|
| 1081 |
+
"step": 3500
|
| 1082 |
+
},
|
| 1083 |
+
{
|
| 1084 |
+
"epoch": 0.6265552790615002,
|
| 1085 |
+
"grad_norm": 1.6384830474853516,
|
| 1086 |
+
"learning_rate": 7.950075642965204e-06,
|
| 1087 |
+
"loss": 0.018657710552215576,
|
| 1088 |
+
"step": 3525
|
| 1089 |
+
},
|
| 1090 |
+
{
|
| 1091 |
+
"epoch": 0.6309989335229292,
|
| 1092 |
+
"grad_norm": 1.4129881858825684,
|
| 1093 |
+
"learning_rate": 7.85552193645991e-06,
|
| 1094 |
+
"loss": 0.027512576580047608,
|
| 1095 |
+
"step": 3550
|
| 1096 |
+
},
|
| 1097 |
+
{
|
| 1098 |
+
"epoch": 0.6354425879843584,
|
| 1099 |
+
"grad_norm": 1.2471665143966675,
|
| 1100 |
+
"learning_rate": 7.760968229954615e-06,
|
| 1101 |
+
"loss": 0.029382569789886473,
|
| 1102 |
+
"step": 3575
|
| 1103 |
+
},
|
| 1104 |
+
{
|
| 1105 |
+
"epoch": 0.6398862424457874,
|
| 1106 |
+
"grad_norm": 1.1254513263702393,
|
| 1107 |
+
"learning_rate": 7.66641452344932e-06,
|
| 1108 |
+
"loss": 0.023775274753570556,
|
| 1109 |
+
"step": 3600
|
| 1110 |
+
},
|
| 1111 |
+
{
|
| 1112 |
+
"epoch": 0.6443298969072165,
|
| 1113 |
+
"grad_norm": 0.9185925126075745,
|
| 1114 |
+
"learning_rate": 7.571860816944025e-06,
|
| 1115 |
+
"loss": 0.025625219345092775,
|
| 1116 |
+
"step": 3625
|
| 1117 |
+
},
|
| 1118 |
+
{
|
| 1119 |
+
"epoch": 0.6487735513686456,
|
| 1120 |
+
"grad_norm": 0.9741719961166382,
|
| 1121 |
+
"learning_rate": 7.477307110438729e-06,
|
| 1122 |
+
"loss": 0.014400173425674439,
|
| 1123 |
+
"step": 3650
|
| 1124 |
+
},
|
| 1125 |
+
{
|
| 1126 |
+
"epoch": 0.6532172058300747,
|
| 1127 |
+
"grad_norm": 1.5722410678863525,
|
| 1128 |
+
"learning_rate": 7.382753403933435e-06,
|
| 1129 |
+
"loss": 0.016961036920547484,
|
| 1130 |
+
"step": 3675
|
| 1131 |
+
},
|
| 1132 |
+
{
|
| 1133 |
+
"epoch": 0.6576608602915037,
|
| 1134 |
+
"grad_norm": 1.0956284999847412,
|
| 1135 |
+
"learning_rate": 7.28819969742814e-06,
|
| 1136 |
+
"loss": 0.029399728775024413,
|
| 1137 |
+
"step": 3700
|
| 1138 |
+
},
|
| 1139 |
+
{
|
| 1140 |
+
"epoch": 0.6621045147529329,
|
| 1141 |
+
"grad_norm": 1.8072013854980469,
|
| 1142 |
+
"learning_rate": 7.193645990922845e-06,
|
| 1143 |
+
"loss": 0.02603915214538574,
|
| 1144 |
+
"step": 3725
|
| 1145 |
+
},
|
| 1146 |
+
{
|
| 1147 |
+
"epoch": 0.6665481692143619,
|
| 1148 |
+
"grad_norm": 1.4998871088027954,
|
| 1149 |
+
"learning_rate": 7.09909228441755e-06,
|
| 1150 |
+
"loss": 0.018154734373092653,
|
| 1151 |
+
"step": 3750
|
| 1152 |
+
},
|
| 1153 |
+
{
|
| 1154 |
+
"epoch": 0.670991823675791,
|
| 1155 |
+
"grad_norm": 1.015345573425293,
|
| 1156 |
+
"learning_rate": 7.004538577912255e-06,
|
| 1157 |
+
"loss": 0.020612461566925047,
|
| 1158 |
+
"step": 3775
|
| 1159 |
+
},
|
| 1160 |
+
{
|
| 1161 |
+
"epoch": 0.67543547813722,
|
| 1162 |
+
"grad_norm": 0.518636167049408,
|
| 1163 |
+
"learning_rate": 6.909984871406959e-06,
|
| 1164 |
+
"loss": 0.020666675567626955,
|
| 1165 |
+
"step": 3800
|
| 1166 |
+
},
|
| 1167 |
+
{
|
| 1168 |
+
"epoch": 0.6798791325986492,
|
| 1169 |
+
"grad_norm": 1.4760479927062988,
|
| 1170 |
+
"learning_rate": 6.815431164901665e-06,
|
| 1171 |
+
"loss": 0.022126734256744385,
|
| 1172 |
+
"step": 3825
|
| 1173 |
+
},
|
| 1174 |
+
{
|
| 1175 |
+
"epoch": 0.6843227870600782,
|
| 1176 |
+
"grad_norm": 0.5096405744552612,
|
| 1177 |
+
"learning_rate": 6.7208774583963696e-06,
|
| 1178 |
+
"loss": 0.023763720989227296,
|
| 1179 |
+
"step": 3850
|
| 1180 |
+
},
|
| 1181 |
+
{
|
| 1182 |
+
"epoch": 0.6887664415215072,
|
| 1183 |
+
"grad_norm": 0.7516443133354187,
|
| 1184 |
+
"learning_rate": 6.626323751891075e-06,
|
| 1185 |
+
"loss": 0.023717043399810792,
|
| 1186 |
+
"step": 3875
|
| 1187 |
+
},
|
| 1188 |
+
{
|
| 1189 |
+
"epoch": 0.6932100959829364,
|
| 1190 |
+
"grad_norm": 0.8385019898414612,
|
| 1191 |
+
"learning_rate": 6.53177004538578e-06,
|
| 1192 |
+
"loss": 0.021878042221069337,
|
| 1193 |
+
"step": 3900
|
| 1194 |
+
},
|
| 1195 |
+
{
|
| 1196 |
+
"epoch": 0.6976537504443654,
|
| 1197 |
+
"grad_norm": 1.1693350076675415,
|
| 1198 |
+
"learning_rate": 6.4372163388804845e-06,
|
| 1199 |
+
"loss": 0.014371514320373535,
|
| 1200 |
+
"step": 3925
|
| 1201 |
+
},
|
| 1202 |
+
{
|
| 1203 |
+
"epoch": 0.7020974049057945,
|
| 1204 |
+
"grad_norm": 1.4496546983718872,
|
| 1205 |
+
"learning_rate": 6.342662632375189e-06,
|
| 1206 |
+
"loss": 0.021327991485595704,
|
| 1207 |
+
"step": 3950
|
| 1208 |
+
},
|
| 1209 |
+
{
|
| 1210 |
+
"epoch": 0.7065410593672236,
|
| 1211 |
+
"grad_norm": 1.0142734050750732,
|
| 1212 |
+
"learning_rate": 6.248108925869895e-06,
|
| 1213 |
+
"loss": 0.023486480712890626,
|
| 1214 |
+
"step": 3975
|
| 1215 |
+
},
|
| 1216 |
+
{
|
| 1217 |
+
"epoch": 0.7109847138286527,
|
| 1218 |
+
"grad_norm": 0.4203350245952606,
|
| 1219 |
+
"learning_rate": 6.1535552193645995e-06,
|
| 1220 |
+
"loss": 0.023264715671539305,
|
| 1221 |
+
"step": 4000
|
| 1222 |
+
},
|
| 1223 |
+
{
|
| 1224 |
+
"epoch": 0.7109847138286527,
|
| 1225 |
+
"eval_accuracy": 0.9335,
|
| 1226 |
+
"eval_auroc": 0.9970688944802013,
|
| 1227 |
+
"eval_f1": 0.9379374708352777,
|
| 1228 |
+
"eval_loss": 0.02452407218515873,
|
| 1229 |
+
"eval_runtime": 40.1446,
|
| 1230 |
+
"eval_samples_per_second": 49.82,
|
| 1231 |
+
"eval_steps_per_second": 1.569,
|
| 1232 |
+
"eval_tpr_at_fpr1": 0.9264413518886679,
|
| 1233 |
+
"eval_tpr_at_fpr5": 0.9850894632206759,
|
| 1234 |
+
"step": 4000
|
| 1235 |
+
},
|
| 1236 |
+
{
|
| 1237 |
+
"epoch": 0.7154283682900817,
|
| 1238 |
+
"grad_norm": 0.6930143237113953,
|
| 1239 |
+
"learning_rate": 6.059001512859305e-06,
|
| 1240 |
+
"loss": 0.01856299042701721,
|
| 1241 |
+
"step": 4025
|
| 1242 |
+
},
|
| 1243 |
+
{
|
| 1244 |
+
"epoch": 0.7198720227515109,
|
| 1245 |
+
"grad_norm": 1.0674962997436523,
|
| 1246 |
+
"learning_rate": 5.96444780635401e-06,
|
| 1247 |
+
"loss": 0.023626606464385986,
|
| 1248 |
+
"step": 4050
|
| 1249 |
+
},
|
| 1250 |
+
{
|
| 1251 |
+
"epoch": 0.7243156772129399,
|
| 1252 |
+
"grad_norm": 0.6356366276741028,
|
| 1253 |
+
"learning_rate": 5.8698940998487145e-06,
|
| 1254 |
+
"loss": 0.023326983451843263,
|
| 1255 |
+
"step": 4075
|
| 1256 |
+
},
|
| 1257 |
+
{
|
| 1258 |
+
"epoch": 0.728759331674369,
|
| 1259 |
+
"grad_norm": 0.8227376937866211,
|
| 1260 |
+
"learning_rate": 5.775340393343419e-06,
|
| 1261 |
+
"loss": 0.02331566095352173,
|
| 1262 |
+
"step": 4100
|
| 1263 |
+
},
|
| 1264 |
+
{
|
| 1265 |
+
"epoch": 0.733202986135798,
|
| 1266 |
+
"grad_norm": 2.189657211303711,
|
| 1267 |
+
"learning_rate": 5.680786686838125e-06,
|
| 1268 |
+
"loss": 0.01987994074821472,
|
| 1269 |
+
"step": 4125
|
| 1270 |
+
},
|
| 1271 |
+
{
|
| 1272 |
+
"epoch": 0.7376466405972272,
|
| 1273 |
+
"grad_norm": 0.46455055475234985,
|
| 1274 |
+
"learning_rate": 5.5862329803328295e-06,
|
| 1275 |
+
"loss": 0.01780161142349243,
|
| 1276 |
+
"step": 4150
|
| 1277 |
+
},
|
| 1278 |
+
{
|
| 1279 |
+
"epoch": 0.7420902950586562,
|
| 1280 |
+
"grad_norm": 0.7525627017021179,
|
| 1281 |
+
"learning_rate": 5.491679273827535e-06,
|
| 1282 |
+
"loss": 0.02872683048248291,
|
| 1283 |
+
"step": 4175
|
| 1284 |
+
},
|
| 1285 |
+
{
|
| 1286 |
+
"epoch": 0.7465339495200853,
|
| 1287 |
+
"grad_norm": 0.9939025640487671,
|
| 1288 |
+
"learning_rate": 5.39712556732224e-06,
|
| 1289 |
+
"loss": 0.021651785373687744,
|
| 1290 |
+
"step": 4200
|
| 1291 |
+
},
|
| 1292 |
+
{
|
| 1293 |
+
"epoch": 0.7509776039815144,
|
| 1294 |
+
"grad_norm": 0.5748035907745361,
|
| 1295 |
+
"learning_rate": 5.3025718608169445e-06,
|
| 1296 |
+
"loss": 0.01857919096946716,
|
| 1297 |
+
"step": 4225
|
| 1298 |
+
},
|
| 1299 |
+
{
|
| 1300 |
+
"epoch": 0.7554212584429435,
|
| 1301 |
+
"grad_norm": 1.1377756595611572,
|
| 1302 |
+
"learning_rate": 5.208018154311649e-06,
|
| 1303 |
+
"loss": 0.021290059089660644,
|
| 1304 |
+
"step": 4250
|
| 1305 |
+
},
|
| 1306 |
+
{
|
| 1307 |
+
"epoch": 0.7598649129043725,
|
| 1308 |
+
"grad_norm": 1.592410683631897,
|
| 1309 |
+
"learning_rate": 5.113464447806355e-06,
|
| 1310 |
+
"loss": 0.01949896812438965,
|
| 1311 |
+
"step": 4275
|
| 1312 |
+
},
|
| 1313 |
+
{
|
| 1314 |
+
"epoch": 0.7643085673658017,
|
| 1315 |
+
"grad_norm": 1.3217352628707886,
|
| 1316 |
+
"learning_rate": 5.0189107413010595e-06,
|
| 1317 |
+
"loss": 0.024791300296783447,
|
| 1318 |
+
"step": 4300
|
| 1319 |
+
},
|
| 1320 |
+
{
|
| 1321 |
+
"epoch": 0.7687522218272307,
|
| 1322 |
+
"grad_norm": 0.3922988474369049,
|
| 1323 |
+
"learning_rate": 4.924357034795764e-06,
|
| 1324 |
+
"loss": 0.021808433532714843,
|
| 1325 |
+
"step": 4325
|
| 1326 |
+
},
|
| 1327 |
+
{
|
| 1328 |
+
"epoch": 0.7731958762886598,
|
| 1329 |
+
"grad_norm": 0.45381656289100647,
|
| 1330 |
+
"learning_rate": 4.82980332829047e-06,
|
| 1331 |
+
"loss": 0.020455398559570313,
|
| 1332 |
+
"step": 4350
|
| 1333 |
+
},
|
| 1334 |
+
{
|
| 1335 |
+
"epoch": 0.7776395307500888,
|
| 1336 |
+
"grad_norm": 0.8540909886360168,
|
| 1337 |
+
"learning_rate": 4.7352496217851745e-06,
|
| 1338 |
+
"loss": 0.023225700855255126,
|
| 1339 |
+
"step": 4375
|
| 1340 |
+
},
|
| 1341 |
+
{
|
| 1342 |
+
"epoch": 0.782083185211518,
|
| 1343 |
+
"grad_norm": 2.9069783687591553,
|
| 1344 |
+
"learning_rate": 4.640695915279879e-06,
|
| 1345 |
+
"loss": 0.023292510509490966,
|
| 1346 |
+
"step": 4400
|
| 1347 |
+
},
|
| 1348 |
+
{
|
| 1349 |
+
"epoch": 0.786526839672947,
|
| 1350 |
+
"grad_norm": 0.9787670969963074,
|
| 1351 |
+
"learning_rate": 4.546142208774585e-06,
|
| 1352 |
+
"loss": 0.024890389442443848,
|
| 1353 |
+
"step": 4425
|
| 1354 |
+
},
|
| 1355 |
+
{
|
| 1356 |
+
"epoch": 0.7909704941343761,
|
| 1357 |
+
"grad_norm": 1.0303661823272705,
|
| 1358 |
+
"learning_rate": 4.4515885022692894e-06,
|
| 1359 |
+
"loss": 0.023072149753570557,
|
| 1360 |
+
"step": 4450
|
| 1361 |
+
},
|
| 1362 |
+
{
|
| 1363 |
+
"epoch": 0.7954141485958052,
|
| 1364 |
+
"grad_norm": 2.1931862831115723,
|
| 1365 |
+
"learning_rate": 4.357034795763994e-06,
|
| 1366 |
+
"loss": 0.032431015968322756,
|
| 1367 |
+
"step": 4475
|
| 1368 |
+
},
|
| 1369 |
+
{
|
| 1370 |
+
"epoch": 0.7998578030572343,
|
| 1371 |
+
"grad_norm": 0.7739485502243042,
|
| 1372 |
+
"learning_rate": 4.2624810892587e-06,
|
| 1373 |
+
"loss": 0.020095715522766112,
|
| 1374 |
+
"step": 4500
|
| 1375 |
+
},
|
| 1376 |
+
{
|
| 1377 |
+
"epoch": 0.7998578030572343,
|
| 1378 |
+
"eval_accuracy": 0.9275,
|
| 1379 |
+
"eval_auroc": 0.9970798948762156,
|
| 1380 |
+
"eval_f1": 0.9327146171693736,
|
| 1381 |
+
"eval_loss": 0.024173183366656303,
|
| 1382 |
+
"eval_runtime": 39.1522,
|
| 1383 |
+
"eval_samples_per_second": 51.083,
|
| 1384 |
+
"eval_steps_per_second": 1.609,
|
| 1385 |
+
"eval_tpr_at_fpr1": 0.9224652087475149,
|
| 1386 |
+
"eval_tpr_at_fpr5": 0.9900596421471173,
|
| 1387 |
+
"step": 4500
|
| 1388 |
+
},
|
| 1389 |
+
{
|
| 1390 |
+
"epoch": 0.8043014575186633,
|
| 1391 |
+
"grad_norm": 1.5598735809326172,
|
| 1392 |
+
"learning_rate": 4.167927382753404e-06,
|
| 1393 |
+
"loss": 0.024987099170684816,
|
| 1394 |
+
"step": 4525
|
| 1395 |
+
},
|
| 1396 |
+
{
|
| 1397 |
+
"epoch": 0.8087451119800925,
|
| 1398 |
+
"grad_norm": 1.1426900625228882,
|
| 1399 |
+
"learning_rate": 4.073373676248109e-06,
|
| 1400 |
+
"loss": 0.01934351325035095,
|
| 1401 |
+
"step": 4550
|
| 1402 |
+
},
|
| 1403 |
+
{
|
| 1404 |
+
"epoch": 0.8131887664415215,
|
| 1405 |
+
"grad_norm": 0.3795163333415985,
|
| 1406 |
+
"learning_rate": 3.978819969742814e-06,
|
| 1407 |
+
"loss": 0.020940425395965575,
|
| 1408 |
+
"step": 4575
|
| 1409 |
+
},
|
| 1410 |
+
{
|
| 1411 |
+
"epoch": 0.8176324209029506,
|
| 1412 |
+
"grad_norm": 1.1596218347549438,
|
| 1413 |
+
"learning_rate": 3.884266263237519e-06,
|
| 1414 |
+
"loss": 0.027658913135528564,
|
| 1415 |
+
"step": 4600
|
| 1416 |
+
},
|
| 1417 |
+
{
|
| 1418 |
+
"epoch": 0.8220760753643797,
|
| 1419 |
+
"grad_norm": 1.05118989944458,
|
| 1420 |
+
"learning_rate": 3.789712556732224e-06,
|
| 1421 |
+
"loss": 0.016726157665252685,
|
| 1422 |
+
"step": 4625
|
| 1423 |
+
},
|
| 1424 |
+
{
|
| 1425 |
+
"epoch": 0.8265197298258088,
|
| 1426 |
+
"grad_norm": 0.994926393032074,
|
| 1427 |
+
"learning_rate": 3.6951588502269293e-06,
|
| 1428 |
+
"loss": 0.013396300077438354,
|
| 1429 |
+
"step": 4650
|
| 1430 |
+
},
|
| 1431 |
+
{
|
| 1432 |
+
"epoch": 0.8309633842872378,
|
| 1433 |
+
"grad_norm": 2.416964054107666,
|
| 1434 |
+
"learning_rate": 3.6006051437216344e-06,
|
| 1435 |
+
"loss": 0.02318723201751709,
|
| 1436 |
+
"step": 4675
|
| 1437 |
+
},
|
| 1438 |
+
{
|
| 1439 |
+
"epoch": 0.835407038748667,
|
| 1440 |
+
"grad_norm": 2.359633445739746,
|
| 1441 |
+
"learning_rate": 3.506051437216339e-06,
|
| 1442 |
+
"loss": 0.02345597982406616,
|
| 1443 |
+
"step": 4700
|
| 1444 |
+
},
|
| 1445 |
+
{
|
| 1446 |
+
"epoch": 0.839850693210096,
|
| 1447 |
+
"grad_norm": 1.0586191415786743,
|
| 1448 |
+
"learning_rate": 3.4114977307110442e-06,
|
| 1449 |
+
"loss": 0.021095492839813233,
|
| 1450 |
+
"step": 4725
|
| 1451 |
+
},
|
| 1452 |
+
{
|
| 1453 |
+
"epoch": 0.8442943476715251,
|
| 1454 |
+
"grad_norm": 1.2005938291549683,
|
| 1455 |
+
"learning_rate": 3.3169440242057494e-06,
|
| 1456 |
+
"loss": 0.023975539207458495,
|
| 1457 |
+
"step": 4750
|
| 1458 |
+
},
|
| 1459 |
+
{
|
| 1460 |
+
"epoch": 0.8487380021329541,
|
| 1461 |
+
"grad_norm": 0.22911959886550903,
|
| 1462 |
+
"learning_rate": 3.222390317700454e-06,
|
| 1463 |
+
"loss": 0.019334245920181275,
|
| 1464 |
+
"step": 4775
|
| 1465 |
+
},
|
| 1466 |
+
{
|
| 1467 |
+
"epoch": 0.8531816565943833,
|
| 1468 |
+
"grad_norm": 1.3965319395065308,
|
| 1469 |
+
"learning_rate": 3.1278366111951592e-06,
|
| 1470 |
+
"loss": 0.027639262676239014,
|
| 1471 |
+
"step": 4800
|
| 1472 |
+
},
|
| 1473 |
+
{
|
| 1474 |
+
"epoch": 0.8576253110558123,
|
| 1475 |
+
"grad_norm": 0.15118920803070068,
|
| 1476 |
+
"learning_rate": 3.0332829046898644e-06,
|
| 1477 |
+
"loss": 0.020000927448272705,
|
| 1478 |
+
"step": 4825
|
| 1479 |
+
},
|
| 1480 |
+
{
|
| 1481 |
+
"epoch": 0.8620689655172413,
|
| 1482 |
+
"grad_norm": 1.7333295345306396,
|
| 1483 |
+
"learning_rate": 2.938729198184569e-06,
|
| 1484 |
+
"loss": 0.02153873920440674,
|
| 1485 |
+
"step": 4850
|
| 1486 |
+
},
|
| 1487 |
+
{
|
| 1488 |
+
"epoch": 0.8665126199786705,
|
| 1489 |
+
"grad_norm": 0.2823106348514557,
|
| 1490 |
+
"learning_rate": 2.844175491679274e-06,
|
| 1491 |
+
"loss": 0.014718363285064697,
|
| 1492 |
+
"step": 4875
|
| 1493 |
+
},
|
| 1494 |
+
{
|
| 1495 |
+
"epoch": 0.8709562744400995,
|
| 1496 |
+
"grad_norm": 0.735140323638916,
|
| 1497 |
+
"learning_rate": 2.7496217851739793e-06,
|
| 1498 |
+
"loss": 0.01869586229324341,
|
| 1499 |
+
"step": 4900
|
| 1500 |
+
},
|
| 1501 |
+
{
|
| 1502 |
+
"epoch": 0.8753999289015286,
|
| 1503 |
+
"grad_norm": 0.4756013751029968,
|
| 1504 |
+
"learning_rate": 2.655068078668684e-06,
|
| 1505 |
+
"loss": 0.01766459345817566,
|
| 1506 |
+
"step": 4925
|
| 1507 |
+
},
|
| 1508 |
+
{
|
| 1509 |
+
"epoch": 0.8798435833629576,
|
| 1510 |
+
"grad_norm": 1.9793126583099365,
|
| 1511 |
+
"learning_rate": 2.560514372163389e-06,
|
| 1512 |
+
"loss": 0.01844774007797241,
|
| 1513 |
+
"step": 4950
|
| 1514 |
+
},
|
| 1515 |
+
{
|
| 1516 |
+
"epoch": 0.8842872378243868,
|
| 1517 |
+
"grad_norm": 0.3258880376815796,
|
| 1518 |
+
"learning_rate": 2.465960665658094e-06,
|
| 1519 |
+
"loss": 0.01422677755355835,
|
| 1520 |
+
"step": 4975
|
| 1521 |
+
},
|
| 1522 |
+
{
|
| 1523 |
+
"epoch": 0.8887308922858158,
|
| 1524 |
+
"grad_norm": 0.9487712383270264,
|
| 1525 |
+
"learning_rate": 2.371406959152799e-06,
|
| 1526 |
+
"loss": 0.016807562112808226,
|
| 1527 |
+
"step": 5000
|
| 1528 |
+
},
|
| 1529 |
+
{
|
| 1530 |
+
"epoch": 0.8887308922858158,
|
| 1531 |
+
"eval_accuracy": 0.9295,
|
| 1532 |
+
"eval_auroc": 0.9969978919241093,
|
| 1533 |
+
"eval_f1": 0.9344490934449095,
|
| 1534 |
+
"eval_loss": 0.024602515622973442,
|
| 1535 |
+
"eval_runtime": 39.8771,
|
| 1536 |
+
"eval_samples_per_second": 50.154,
|
| 1537 |
+
"eval_steps_per_second": 1.58,
|
| 1538 |
+
"eval_tpr_at_fpr1": 0.889662027833002,
|
| 1539 |
+
"eval_tpr_at_fpr5": 0.9910536779324056,
|
| 1540 |
+
"step": 5000
|
| 1541 |
+
},
|
| 1542 |
+
{
|
| 1543 |
+
"epoch": 0.893174546747245,
|
| 1544 |
+
"grad_norm": 1.8331549167633057,
|
| 1545 |
+
"learning_rate": 2.276853252647504e-06,
|
| 1546 |
+
"loss": 0.019318313598632814,
|
| 1547 |
+
"step": 5025
|
| 1548 |
+
},
|
| 1549 |
+
{
|
| 1550 |
+
"epoch": 0.897618201208674,
|
| 1551 |
+
"grad_norm": 0.5335781574249268,
|
| 1552 |
+
"learning_rate": 2.182299546142209e-06,
|
| 1553 |
+
"loss": 0.016727542877197264,
|
| 1554 |
+
"step": 5050
|
| 1555 |
+
},
|
| 1556 |
+
{
|
| 1557 |
+
"epoch": 0.9020618556701031,
|
| 1558 |
+
"grad_norm": 2.0710813999176025,
|
| 1559 |
+
"learning_rate": 2.087745839636914e-06,
|
| 1560 |
+
"loss": 0.024016971588134765,
|
| 1561 |
+
"step": 5075
|
| 1562 |
+
},
|
| 1563 |
+
{
|
| 1564 |
+
"epoch": 0.9065055101315321,
|
| 1565 |
+
"grad_norm": 0.5005258321762085,
|
| 1566 |
+
"learning_rate": 1.993192133131619e-06,
|
| 1567 |
+
"loss": 0.023308300971984865,
|
| 1568 |
+
"step": 5100
|
| 1569 |
+
},
|
| 1570 |
+
{
|
| 1571 |
+
"epoch": 0.9109491645929613,
|
| 1572 |
+
"grad_norm": 0.8444198369979858,
|
| 1573 |
+
"learning_rate": 1.8986384266263239e-06,
|
| 1574 |
+
"loss": 0.013868091106414794,
|
| 1575 |
+
"step": 5125
|
| 1576 |
+
},
|
| 1577 |
+
{
|
| 1578 |
+
"epoch": 0.9153928190543903,
|
| 1579 |
+
"grad_norm": 1.0288333892822266,
|
| 1580 |
+
"learning_rate": 1.8040847201210288e-06,
|
| 1581 |
+
"loss": 0.02429831266403198,
|
| 1582 |
+
"step": 5150
|
| 1583 |
+
},
|
| 1584 |
+
{
|
| 1585 |
+
"epoch": 0.9198364735158194,
|
| 1586 |
+
"grad_norm": 0.5995722413063049,
|
| 1587 |
+
"learning_rate": 1.709531013615734e-06,
|
| 1588 |
+
"loss": 0.020702524185180662,
|
| 1589 |
+
"step": 5175
|
| 1590 |
+
},
|
| 1591 |
+
{
|
| 1592 |
+
"epoch": 0.9242801279772485,
|
| 1593 |
+
"grad_norm": 1.5560880899429321,
|
| 1594 |
+
"learning_rate": 1.6149773071104389e-06,
|
| 1595 |
+
"loss": 0.014829163551330566,
|
| 1596 |
+
"step": 5200
|
| 1597 |
+
},
|
| 1598 |
+
{
|
| 1599 |
+
"epoch": 0.9287237824386776,
|
| 1600 |
+
"grad_norm": 1.271360993385315,
|
| 1601 |
+
"learning_rate": 1.5204236006051438e-06,
|
| 1602 |
+
"loss": 0.020302300453186036,
|
| 1603 |
+
"step": 5225
|
| 1604 |
+
},
|
| 1605 |
+
{
|
| 1606 |
+
"epoch": 0.9331674369001066,
|
| 1607 |
+
"grad_norm": 2.036619186401367,
|
| 1608 |
+
"learning_rate": 1.425869894099849e-06,
|
| 1609 |
+
"loss": 0.023549365997314452,
|
| 1610 |
+
"step": 5250
|
| 1611 |
+
},
|
| 1612 |
+
{
|
| 1613 |
+
"epoch": 0.9376110913615358,
|
| 1614 |
+
"grad_norm": 1.7285027503967285,
|
| 1615 |
+
"learning_rate": 1.3313161875945538e-06,
|
| 1616 |
+
"loss": 0.02568220853805542,
|
| 1617 |
+
"step": 5275
|
| 1618 |
+
},
|
| 1619 |
+
{
|
| 1620 |
+
"epoch": 0.9420547458229648,
|
| 1621 |
+
"grad_norm": 1.4646673202514648,
|
| 1622 |
+
"learning_rate": 1.2367624810892588e-06,
|
| 1623 |
+
"loss": 0.030799252986907957,
|
| 1624 |
+
"step": 5300
|
| 1625 |
+
},
|
| 1626 |
+
{
|
| 1627 |
+
"epoch": 0.9464984002843939,
|
| 1628 |
+
"grad_norm": 0.6867812871932983,
|
| 1629 |
+
"learning_rate": 1.142208774583964e-06,
|
| 1630 |
+
"loss": 0.018717833757400514,
|
| 1631 |
+
"step": 5325
|
| 1632 |
+
},
|
| 1633 |
+
{
|
| 1634 |
+
"epoch": 0.9509420547458229,
|
| 1635 |
+
"grad_norm": 0.2627001702785492,
|
| 1636 |
+
"learning_rate": 1.0476550680786688e-06,
|
| 1637 |
+
"loss": 0.01653684616088867,
|
| 1638 |
+
"step": 5350
|
| 1639 |
+
},
|
| 1640 |
+
{
|
| 1641 |
+
"epoch": 0.9553857092072521,
|
| 1642 |
+
"grad_norm": 0.30518868565559387,
|
| 1643 |
+
"learning_rate": 9.531013615733737e-07,
|
| 1644 |
+
"loss": 0.022223813533782957,
|
| 1645 |
+
"step": 5375
|
| 1646 |
+
},
|
| 1647 |
+
{
|
| 1648 |
+
"epoch": 0.9598293636686811,
|
| 1649 |
+
"grad_norm": 0.5545350909233093,
|
| 1650 |
+
"learning_rate": 8.585476550680788e-07,
|
| 1651 |
+
"loss": 0.018215081691741943,
|
| 1652 |
+
"step": 5400
|
| 1653 |
+
},
|
| 1654 |
+
{
|
| 1655 |
+
"epoch": 0.9642730181301102,
|
| 1656 |
+
"grad_norm": 0.304283082485199,
|
| 1657 |
+
"learning_rate": 7.639939485627837e-07,
|
| 1658 |
+
"loss": 0.019889332056045532,
|
| 1659 |
+
"step": 5425
|
| 1660 |
+
},
|
| 1661 |
+
{
|
| 1662 |
+
"epoch": 0.9687166725915393,
|
| 1663 |
+
"grad_norm": 1.052090048789978,
|
| 1664 |
+
"learning_rate": 6.694402420574887e-07,
|
| 1665 |
+
"loss": 0.017396693229675294,
|
| 1666 |
+
"step": 5450
|
| 1667 |
+
},
|
| 1668 |
+
{
|
| 1669 |
+
"epoch": 0.9731603270529684,
|
| 1670 |
+
"grad_norm": 0.1891939640045166,
|
| 1671 |
+
"learning_rate": 5.748865355521937e-07,
|
| 1672 |
+
"loss": 0.019253385066986085,
|
| 1673 |
+
"step": 5475
|
| 1674 |
+
},
|
| 1675 |
+
{
|
| 1676 |
+
"epoch": 0.9776039815143974,
|
| 1677 |
+
"grad_norm": 0.5522451996803284,
|
| 1678 |
+
"learning_rate": 4.803328290468987e-07,
|
| 1679 |
+
"loss": 0.017252475023269653,
|
| 1680 |
+
"step": 5500
|
| 1681 |
+
},
|
| 1682 |
+
{
|
| 1683 |
+
"epoch": 0.9776039815143974,
|
| 1684 |
+
"eval_accuracy": 0.93,
|
| 1685 |
+
"eval_auroc": 0.9973359040925472,
|
| 1686 |
+
"eval_f1": 0.9349442379182157,
|
| 1687 |
+
"eval_loss": 0.024247920140624046,
|
| 1688 |
+
"eval_runtime": 38.3653,
|
| 1689 |
+
"eval_samples_per_second": 52.13,
|
| 1690 |
+
"eval_steps_per_second": 1.642,
|
| 1691 |
+
"eval_tpr_at_fpr1": 0.9055666003976143,
|
| 1692 |
+
"eval_tpr_at_fpr5": 0.9880715705765407,
|
| 1693 |
+
"step": 5500
|
| 1694 |
+
}
|
| 1695 |
+
],
|
| 1696 |
+
"logging_steps": 25,
|
| 1697 |
+
"max_steps": 5626,
|
| 1698 |
+
"num_input_tokens_seen": 0,
|
| 1699 |
+
"num_train_epochs": 1,
|
| 1700 |
+
"save_steps": 500,
|
| 1701 |
+
"stateful_callbacks": {
|
| 1702 |
+
"EarlyStoppingCallback": {
|
| 1703 |
+
"args": {
|
| 1704 |
+
"early_stopping_patience": 3,
|
| 1705 |
+
"early_stopping_threshold": 0.0
|
| 1706 |
+
},
|
| 1707 |
+
"attributes": {
|
| 1708 |
+
"early_stopping_patience_counter": 0
|
| 1709 |
+
}
|
| 1710 |
+
},
|
| 1711 |
+
"TrainerControl": {
|
| 1712 |
+
"args": {
|
| 1713 |
+
"should_epoch_stop": false,
|
| 1714 |
+
"should_evaluate": false,
|
| 1715 |
+
"should_log": false,
|
| 1716 |
+
"should_save": true,
|
| 1717 |
+
"should_training_stop": false
|
| 1718 |
+
},
|
| 1719 |
+
"attributes": {}
|
| 1720 |
+
}
|
| 1721 |
+
},
|
| 1722 |
+
"total_flos": 0.0,
|
| 1723 |
+
"train_batch_size": 32,
|
| 1724 |
+
"trial_name": null,
|
| 1725 |
+
"trial_params": null
|
| 1726 |
+
}
|
checkpoint-5500/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b726f050f5029e1ef25800ffb43c1e5bcf5df8fde670427401e6bad8b3522c9c
|
| 3 |
+
size 5329
|
checkpoint-5626/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8865a48d69b743be81bf98afb9d729976c955f40269b3b87be5ebeeead6b1d9b
|
| 3 |
+
size 736795940
|
checkpoint-5626/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2aafb3d4e70e97e271bcfaafa4b2a63a7a706d7fa375e595ef3c1028febc5937
|
| 3 |
+
size 1473711115
|
checkpoint-5626/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1c70b20302e039ff922ef92da23103cbd68279d464265f819dc67cd09814988
|
| 3 |
+
size 14391
|
checkpoint-5626/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d3c5c2fe67c6e02d1952ca318cead603068a46c59b4564bb99394113f7a5048
|
| 3 |
+
size 1529
|
checkpoint-5626/trainer_state.json
ADDED
|
@@ -0,0 +1,1774 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 5626,
|
| 3 |
+
"best_metric": 0.9973979063246277,
|
| 4 |
+
"best_model_checkpoint": "/Users/anudit/Documents/GitHub/slopdetector/checkpoints/deberta-featattn-20260623-225422/checkpoint-5626",
|
| 5 |
+
"epoch": 1.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 5626,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.004443654461429079,
|
| 14 |
+
"grad_norm": 3.013444662094116,
|
| 15 |
+
"learning_rate": 1.4201183431952664e-06,
|
| 16 |
+
"loss": 0.5004017639160157,
|
| 17 |
+
"step": 25
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.008887308922858158,
|
| 21 |
+
"grad_norm": 2.297563076019287,
|
| 22 |
+
"learning_rate": 2.8994082840236688e-06,
|
| 23 |
+
"loss": 0.3888048934936523,
|
| 24 |
+
"step": 50
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 0.013330963384287239,
|
| 28 |
+
"grad_norm": 1.134047508239746,
|
| 29 |
+
"learning_rate": 4.3786982248520715e-06,
|
| 30 |
+
"loss": 0.2801882553100586,
|
| 31 |
+
"step": 75
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 0.017774617845716316,
|
| 35 |
+
"grad_norm": 0.9834569096565247,
|
| 36 |
+
"learning_rate": 5.857988165680474e-06,
|
| 37 |
+
"loss": 0.24047454833984375,
|
| 38 |
+
"step": 100
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"epoch": 0.022218272307145397,
|
| 42 |
+
"grad_norm": 1.117211103439331,
|
| 43 |
+
"learning_rate": 7.337278106508876e-06,
|
| 44 |
+
"loss": 0.2291146469116211,
|
| 45 |
+
"step": 125
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"epoch": 0.026661926768574477,
|
| 49 |
+
"grad_norm": 0.8506172895431519,
|
| 50 |
+
"learning_rate": 8.816568047337279e-06,
|
| 51 |
+
"loss": 0.1967698097229004,
|
| 52 |
+
"step": 150
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"epoch": 0.031105581230003555,
|
| 56 |
+
"grad_norm": 0.7021474242210388,
|
| 57 |
+
"learning_rate": 1.029585798816568e-05,
|
| 58 |
+
"loss": 0.17146373748779298,
|
| 59 |
+
"step": 175
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"epoch": 0.03554923569143263,
|
| 63 |
+
"grad_norm": 1.2111107110977173,
|
| 64 |
+
"learning_rate": 1.1775147928994083e-05,
|
| 65 |
+
"loss": 0.13825268745422364,
|
| 66 |
+
"step": 200
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"epoch": 0.03999289015286171,
|
| 70 |
+
"grad_norm": 1.9403120279312134,
|
| 71 |
+
"learning_rate": 1.3254437869822488e-05,
|
| 72 |
+
"loss": 0.12618659019470216,
|
| 73 |
+
"step": 225
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"epoch": 0.04443654461429079,
|
| 77 |
+
"grad_norm": 1.8931593894958496,
|
| 78 |
+
"learning_rate": 1.4733727810650888e-05,
|
| 79 |
+
"loss": 0.10122986793518067,
|
| 80 |
+
"step": 250
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"epoch": 0.048880199075719874,
|
| 84 |
+
"grad_norm": 1.8619073629379272,
|
| 85 |
+
"learning_rate": 1.621301775147929e-05,
|
| 86 |
+
"loss": 0.07919074535369873,
|
| 87 |
+
"step": 275
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"epoch": 0.053323853537148955,
|
| 91 |
+
"grad_norm": 1.9105793237686157,
|
| 92 |
+
"learning_rate": 1.7692307692307694e-05,
|
| 93 |
+
"loss": 0.08386680603027344,
|
| 94 |
+
"step": 300
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"epoch": 0.05776750799857803,
|
| 98 |
+
"grad_norm": 2.0332772731781006,
|
| 99 |
+
"learning_rate": 1.9171597633136098e-05,
|
| 100 |
+
"loss": 0.08702397346496582,
|
| 101 |
+
"step": 325
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"epoch": 0.06221116246000711,
|
| 105 |
+
"grad_norm": 0.9020377993583679,
|
| 106 |
+
"learning_rate": 1.995839636913767e-05,
|
| 107 |
+
"loss": 0.06874918460845947,
|
| 108 |
+
"step": 350
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"epoch": 0.06665481692143618,
|
| 112 |
+
"grad_norm": 1.6216212511062622,
|
| 113 |
+
"learning_rate": 1.9863842662632376e-05,
|
| 114 |
+
"loss": 0.06805606842041016,
|
| 115 |
+
"step": 375
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"epoch": 0.07109847138286526,
|
| 119 |
+
"grad_norm": 1.7693337202072144,
|
| 120 |
+
"learning_rate": 1.9769288956127082e-05,
|
| 121 |
+
"loss": 0.06352178573608398,
|
| 122 |
+
"step": 400
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"epoch": 0.07554212584429434,
|
| 126 |
+
"grad_norm": 1.6724389791488647,
|
| 127 |
+
"learning_rate": 1.9674735249621784e-05,
|
| 128 |
+
"loss": 0.0673055648803711,
|
| 129 |
+
"step": 425
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"epoch": 0.07998578030572343,
|
| 133 |
+
"grad_norm": 0.5278561115264893,
|
| 134 |
+
"learning_rate": 1.9580181543116493e-05,
|
| 135 |
+
"loss": 0.06466075897216797,
|
| 136 |
+
"step": 450
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"epoch": 0.0844294347671525,
|
| 140 |
+
"grad_norm": 1.7042737007141113,
|
| 141 |
+
"learning_rate": 1.9485627836611195e-05,
|
| 142 |
+
"loss": 0.0630407428741455,
|
| 143 |
+
"step": 475
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"epoch": 0.08887308922858159,
|
| 147 |
+
"grad_norm": 0.3513544797897339,
|
| 148 |
+
"learning_rate": 1.93910741301059e-05,
|
| 149 |
+
"loss": 0.062327189445495604,
|
| 150 |
+
"step": 500
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"epoch": 0.08887308922858159,
|
| 154 |
+
"eval_accuracy": 0.848,
|
| 155 |
+
"eval_auroc": 0.9866655199587185,
|
| 156 |
+
"eval_f1": 0.8685121107266436,
|
| 157 |
+
"eval_loss": 0.054977674037218094,
|
| 158 |
+
"eval_runtime": 39.8938,
|
| 159 |
+
"eval_samples_per_second": 50.133,
|
| 160 |
+
"eval_steps_per_second": 1.579,
|
| 161 |
+
"eval_tpr_at_fpr1": 0.7455268389662028,
|
| 162 |
+
"eval_tpr_at_fpr5": 0.937375745526839,
|
| 163 |
+
"step": 500
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.09331674369001067,
|
| 167 |
+
"grad_norm": 2.1725878715515137,
|
| 168 |
+
"learning_rate": 1.9296520423600606e-05,
|
| 169 |
+
"loss": 0.05081462860107422,
|
| 170 |
+
"step": 525
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.09776039815143975,
|
| 174 |
+
"grad_norm": 2.587542772293091,
|
| 175 |
+
"learning_rate": 1.9201966717095312e-05,
|
| 176 |
+
"loss": 0.06098108291625977,
|
| 177 |
+
"step": 550
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.10220405261286883,
|
| 181 |
+
"grad_norm": 1.1815265417099,
|
| 182 |
+
"learning_rate": 1.9107413010590018e-05,
|
| 183 |
+
"loss": 0.04866991996765137,
|
| 184 |
+
"step": 575
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.10664770707429791,
|
| 188 |
+
"grad_norm": 1.140872597694397,
|
| 189 |
+
"learning_rate": 1.901285930408472e-05,
|
| 190 |
+
"loss": 0.06058640956878662,
|
| 191 |
+
"step": 600
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.11109136153572698,
|
| 195 |
+
"grad_norm": 1.164772868156433,
|
| 196 |
+
"learning_rate": 1.891830559757943e-05,
|
| 197 |
+
"loss": 0.0485923957824707,
|
| 198 |
+
"step": 625
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.11553501599715606,
|
| 202 |
+
"grad_norm": 2.076003074645996,
|
| 203 |
+
"learning_rate": 1.882375189107413e-05,
|
| 204 |
+
"loss": 0.05198529243469238,
|
| 205 |
+
"step": 650
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.11997867045858514,
|
| 209 |
+
"grad_norm": 2.8677966594696045,
|
| 210 |
+
"learning_rate": 1.8729198184568836e-05,
|
| 211 |
+
"loss": 0.05215679168701172,
|
| 212 |
+
"step": 675
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.12442232492001422,
|
| 216 |
+
"grad_norm": 1.243391752243042,
|
| 217 |
+
"learning_rate": 1.8634644478063542e-05,
|
| 218 |
+
"loss": 0.046974472999572754,
|
| 219 |
+
"step": 700
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.12886597938144329,
|
| 223 |
+
"grad_norm": 1.970794916152954,
|
| 224 |
+
"learning_rate": 1.8540090771558244e-05,
|
| 225 |
+
"loss": 0.051630439758300783,
|
| 226 |
+
"step": 725
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.13330963384287237,
|
| 230 |
+
"grad_norm": 1.031387448310852,
|
| 231 |
+
"learning_rate": 1.8445537065052953e-05,
|
| 232 |
+
"loss": 0.04577981948852539,
|
| 233 |
+
"step": 750
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.13775328830430145,
|
| 237 |
+
"grad_norm": 1.4441957473754883,
|
| 238 |
+
"learning_rate": 1.8350983358547655e-05,
|
| 239 |
+
"loss": 0.05677220821380615,
|
| 240 |
+
"step": 775
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.14219694276573053,
|
| 244 |
+
"grad_norm": 1.2302734851837158,
|
| 245 |
+
"learning_rate": 1.825642965204236e-05,
|
| 246 |
+
"loss": 0.043911681175231934,
|
| 247 |
+
"step": 800
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.1466405972271596,
|
| 251 |
+
"grad_norm": 0.9389927983283997,
|
| 252 |
+
"learning_rate": 1.8161875945537066e-05,
|
| 253 |
+
"loss": 0.04272346019744873,
|
| 254 |
+
"step": 825
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.1510842516885887,
|
| 258 |
+
"grad_norm": 1.342290997505188,
|
| 259 |
+
"learning_rate": 1.8067322239031772e-05,
|
| 260 |
+
"loss": 0.054392943382263186,
|
| 261 |
+
"step": 850
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.15552790615001777,
|
| 265 |
+
"grad_norm": 2.6409666538238525,
|
| 266 |
+
"learning_rate": 1.7972768532526477e-05,
|
| 267 |
+
"loss": 0.04197061061859131,
|
| 268 |
+
"step": 875
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.15997156061144685,
|
| 272 |
+
"grad_norm": 1.1038918495178223,
|
| 273 |
+
"learning_rate": 1.787821482602118e-05,
|
| 274 |
+
"loss": 0.03587212562561035,
|
| 275 |
+
"step": 900
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.16441521507287593,
|
| 279 |
+
"grad_norm": 1.414070725440979,
|
| 280 |
+
"learning_rate": 1.778366111951589e-05,
|
| 281 |
+
"loss": 0.04636185646057129,
|
| 282 |
+
"step": 925
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.168858869534305,
|
| 286 |
+
"grad_norm": 2.164773941040039,
|
| 287 |
+
"learning_rate": 1.768910741301059e-05,
|
| 288 |
+
"loss": 0.044623188972473145,
|
| 289 |
+
"step": 950
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.1733025239957341,
|
| 293 |
+
"grad_norm": 2.06410813331604,
|
| 294 |
+
"learning_rate": 1.7594553706505296e-05,
|
| 295 |
+
"loss": 0.038699045181274414,
|
| 296 |
+
"step": 975
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.17774617845716317,
|
| 300 |
+
"grad_norm": 1.53926420211792,
|
| 301 |
+
"learning_rate": 1.7500000000000002e-05,
|
| 302 |
+
"loss": 0.038100283145904544,
|
| 303 |
+
"step": 1000
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.17774617845716317,
|
| 307 |
+
"eval_accuracy": 0.89,
|
| 308 |
+
"eval_auroc": 0.9912356844846415,
|
| 309 |
+
"eval_f1": 0.9012567324955117,
|
| 310 |
+
"eval_loss": 0.042472898960113525,
|
| 311 |
+
"eval_runtime": 38.57,
|
| 312 |
+
"eval_samples_per_second": 51.854,
|
| 313 |
+
"eval_steps_per_second": 1.633,
|
| 314 |
+
"eval_tpr_at_fpr1": 0.8230616302186878,
|
| 315 |
+
"eval_tpr_at_fpr5": 0.952286282306163,
|
| 316 |
+
"step": 1000
|
| 317 |
+
},
|
| 318 |
+
{
|
| 319 |
+
"epoch": 0.18218983291859225,
|
| 320 |
+
"grad_norm": 0.9646220803260803,
|
| 321 |
+
"learning_rate": 1.7405446293494704e-05,
|
| 322 |
+
"loss": 0.03173836708068847,
|
| 323 |
+
"step": 1025
|
| 324 |
+
},
|
| 325 |
+
{
|
| 326 |
+
"epoch": 0.18663348738002133,
|
| 327 |
+
"grad_norm": 1.0528196096420288,
|
| 328 |
+
"learning_rate": 1.7310892586989413e-05,
|
| 329 |
+
"loss": 0.03988344669342041,
|
| 330 |
+
"step": 1050
|
| 331 |
+
},
|
| 332 |
+
{
|
| 333 |
+
"epoch": 0.19107714184145042,
|
| 334 |
+
"grad_norm": 1.5726221799850464,
|
| 335 |
+
"learning_rate": 1.7216338880484115e-05,
|
| 336 |
+
"loss": 0.044674863815307615,
|
| 337 |
+
"step": 1075
|
| 338 |
+
},
|
| 339 |
+
{
|
| 340 |
+
"epoch": 0.1955207963028795,
|
| 341 |
+
"grad_norm": 1.551660418510437,
|
| 342 |
+
"learning_rate": 1.712178517397882e-05,
|
| 343 |
+
"loss": 0.040711288452148435,
|
| 344 |
+
"step": 1100
|
| 345 |
+
},
|
| 346 |
+
{
|
| 347 |
+
"epoch": 0.19996445076430858,
|
| 348 |
+
"grad_norm": 1.090385913848877,
|
| 349 |
+
"learning_rate": 1.7027231467473526e-05,
|
| 350 |
+
"loss": 0.037872114181518556,
|
| 351 |
+
"step": 1125
|
| 352 |
+
},
|
| 353 |
+
{
|
| 354 |
+
"epoch": 0.20440810522573766,
|
| 355 |
+
"grad_norm": 1.1778202056884766,
|
| 356 |
+
"learning_rate": 1.6932677760968232e-05,
|
| 357 |
+
"loss": 0.039130420684814454,
|
| 358 |
+
"step": 1150
|
| 359 |
+
},
|
| 360 |
+
{
|
| 361 |
+
"epoch": 0.20885175968716674,
|
| 362 |
+
"grad_norm": 1.402064323425293,
|
| 363 |
+
"learning_rate": 1.6838124054462937e-05,
|
| 364 |
+
"loss": 0.03875999212265015,
|
| 365 |
+
"step": 1175
|
| 366 |
+
},
|
| 367 |
+
{
|
| 368 |
+
"epoch": 0.21329541414859582,
|
| 369 |
+
"grad_norm": 0.7979677319526672,
|
| 370 |
+
"learning_rate": 1.674357034795764e-05,
|
| 371 |
+
"loss": 0.033257806301116945,
|
| 372 |
+
"step": 1200
|
| 373 |
+
},
|
| 374 |
+
{
|
| 375 |
+
"epoch": 0.21773906861002487,
|
| 376 |
+
"grad_norm": 2.5630085468292236,
|
| 377 |
+
"learning_rate": 1.664901664145235e-05,
|
| 378 |
+
"loss": 0.03850275993347168,
|
| 379 |
+
"step": 1225
|
| 380 |
+
},
|
| 381 |
+
{
|
| 382 |
+
"epoch": 0.22218272307145395,
|
| 383 |
+
"grad_norm": 1.1255887746810913,
|
| 384 |
+
"learning_rate": 1.655446293494705e-05,
|
| 385 |
+
"loss": 0.035763952732086185,
|
| 386 |
+
"step": 1250
|
| 387 |
+
},
|
| 388 |
+
{
|
| 389 |
+
"epoch": 0.22662637753288303,
|
| 390 |
+
"grad_norm": 2.648975133895874,
|
| 391 |
+
"learning_rate": 1.6459909228441756e-05,
|
| 392 |
+
"loss": 0.04280531883239746,
|
| 393 |
+
"step": 1275
|
| 394 |
+
},
|
| 395 |
+
{
|
| 396 |
+
"epoch": 0.23107003199431211,
|
| 397 |
+
"grad_norm": 1.7409067153930664,
|
| 398 |
+
"learning_rate": 1.6365355521936462e-05,
|
| 399 |
+
"loss": 0.04235891819000244,
|
| 400 |
+
"step": 1300
|
| 401 |
+
},
|
| 402 |
+
{
|
| 403 |
+
"epoch": 0.2355136864557412,
|
| 404 |
+
"grad_norm": 1.5755038261413574,
|
| 405 |
+
"learning_rate": 1.6270801815431164e-05,
|
| 406 |
+
"loss": 0.04084760665893555,
|
| 407 |
+
"step": 1325
|
| 408 |
+
},
|
| 409 |
+
{
|
| 410 |
+
"epoch": 0.23995734091717028,
|
| 411 |
+
"grad_norm": 1.2442480325698853,
|
| 412 |
+
"learning_rate": 1.6176248108925873e-05,
|
| 413 |
+
"loss": 0.03942857027053833,
|
| 414 |
+
"step": 1350
|
| 415 |
+
},
|
| 416 |
+
{
|
| 417 |
+
"epoch": 0.24440099537859936,
|
| 418 |
+
"grad_norm": 0.7775816321372986,
|
| 419 |
+
"learning_rate": 1.6081694402420575e-05,
|
| 420 |
+
"loss": 0.039197320938110354,
|
| 421 |
+
"step": 1375
|
| 422 |
+
},
|
| 423 |
+
{
|
| 424 |
+
"epoch": 0.24884464984002844,
|
| 425 |
+
"grad_norm": 0.44854021072387695,
|
| 426 |
+
"learning_rate": 1.598714069591528e-05,
|
| 427 |
+
"loss": 0.030667483806610107,
|
| 428 |
+
"step": 1400
|
| 429 |
+
},
|
| 430 |
+
{
|
| 431 |
+
"epoch": 0.25328830430145755,
|
| 432 |
+
"grad_norm": 0.9631138443946838,
|
| 433 |
+
"learning_rate": 1.5892586989409986e-05,
|
| 434 |
+
"loss": 0.03460927009582519,
|
| 435 |
+
"step": 1425
|
| 436 |
+
},
|
| 437 |
+
{
|
| 438 |
+
"epoch": 0.25773195876288657,
|
| 439 |
+
"grad_norm": 0.8312052488327026,
|
| 440 |
+
"learning_rate": 1.5798033282904692e-05,
|
| 441 |
+
"loss": 0.03320029735565186,
|
| 442 |
+
"step": 1450
|
| 443 |
+
},
|
| 444 |
+
{
|
| 445 |
+
"epoch": 0.26217561322431565,
|
| 446 |
+
"grad_norm": 1.1160472631454468,
|
| 447 |
+
"learning_rate": 1.5703479576399397e-05,
|
| 448 |
+
"loss": 0.03198946952819824,
|
| 449 |
+
"step": 1475
|
| 450 |
+
},
|
| 451 |
+
{
|
| 452 |
+
"epoch": 0.26661926768574473,
|
| 453 |
+
"grad_norm": 1.6029430627822876,
|
| 454 |
+
"learning_rate": 1.56089258698941e-05,
|
| 455 |
+
"loss": 0.034000282287597654,
|
| 456 |
+
"step": 1500
|
| 457 |
+
},
|
| 458 |
+
{
|
| 459 |
+
"epoch": 0.26661926768574473,
|
| 460 |
+
"eval_accuracy": 0.8785,
|
| 461 |
+
"eval_auroc": 0.9909046725682125,
|
| 462 |
+
"eval_f1": 0.8921438082556591,
|
| 463 |
+
"eval_loss": 0.04817873612046242,
|
| 464 |
+
"eval_runtime": 44.1179,
|
| 465 |
+
"eval_samples_per_second": 45.333,
|
| 466 |
+
"eval_steps_per_second": 1.428,
|
| 467 |
+
"eval_tpr_at_fpr1": 0.8021868787276342,
|
| 468 |
+
"eval_tpr_at_fpr5": 0.9572564612326043,
|
| 469 |
+
"step": 1500
|
| 470 |
+
},
|
| 471 |
+
{
|
| 472 |
+
"epoch": 0.2710629221471738,
|
| 473 |
+
"grad_norm": 2.1138088703155518,
|
| 474 |
+
"learning_rate": 1.5514372163388805e-05,
|
| 475 |
+
"loss": 0.03080030918121338,
|
| 476 |
+
"step": 1525
|
| 477 |
+
},
|
| 478 |
+
{
|
| 479 |
+
"epoch": 0.2755065766086029,
|
| 480 |
+
"grad_norm": 2.206002950668335,
|
| 481 |
+
"learning_rate": 1.541981845688351e-05,
|
| 482 |
+
"loss": 0.04029146194458008,
|
| 483 |
+
"step": 1550
|
| 484 |
+
},
|
| 485 |
+
{
|
| 486 |
+
"epoch": 0.279950231070032,
|
| 487 |
+
"grad_norm": 1.8083148002624512,
|
| 488 |
+
"learning_rate": 1.5325264750378216e-05,
|
| 489 |
+
"loss": 0.028056590557098388,
|
| 490 |
+
"step": 1575
|
| 491 |
+
},
|
| 492 |
+
{
|
| 493 |
+
"epoch": 0.28439388553146105,
|
| 494 |
+
"grad_norm": 0.9714005589485168,
|
| 495 |
+
"learning_rate": 1.5230711043872922e-05,
|
| 496 |
+
"loss": 0.033678176403045657,
|
| 497 |
+
"step": 1600
|
| 498 |
+
},
|
| 499 |
+
{
|
| 500 |
+
"epoch": 0.28883753999289014,
|
| 501 |
+
"grad_norm": 0.29741403460502625,
|
| 502 |
+
"learning_rate": 1.5136157337367626e-05,
|
| 503 |
+
"loss": 0.03180084943771362,
|
| 504 |
+
"step": 1625
|
| 505 |
+
},
|
| 506 |
+
{
|
| 507 |
+
"epoch": 0.2932811944543192,
|
| 508 |
+
"grad_norm": 0.516327440738678,
|
| 509 |
+
"learning_rate": 1.5041603630862331e-05,
|
| 510 |
+
"loss": 0.03431586980819702,
|
| 511 |
+
"step": 1650
|
| 512 |
+
},
|
| 513 |
+
{
|
| 514 |
+
"epoch": 0.2977248489157483,
|
| 515 |
+
"grad_norm": 0.7245854735374451,
|
| 516 |
+
"learning_rate": 1.4947049924357035e-05,
|
| 517 |
+
"loss": 0.03058172941207886,
|
| 518 |
+
"step": 1675
|
| 519 |
+
},
|
| 520 |
+
{
|
| 521 |
+
"epoch": 0.3021685033771774,
|
| 522 |
+
"grad_norm": 2.4247725009918213,
|
| 523 |
+
"learning_rate": 1.4852496217851742e-05,
|
| 524 |
+
"loss": 0.04614161014556885,
|
| 525 |
+
"step": 1700
|
| 526 |
+
},
|
| 527 |
+
{
|
| 528 |
+
"epoch": 0.30661215783860646,
|
| 529 |
+
"grad_norm": 1.5023690462112427,
|
| 530 |
+
"learning_rate": 1.4757942511346446e-05,
|
| 531 |
+
"loss": 0.03227074861526489,
|
| 532 |
+
"step": 1725
|
| 533 |
+
},
|
| 534 |
+
{
|
| 535 |
+
"epoch": 0.31105581230003554,
|
| 536 |
+
"grad_norm": 0.5162255764007568,
|
| 537 |
+
"learning_rate": 1.466338880484115e-05,
|
| 538 |
+
"loss": 0.037947914600372314,
|
| 539 |
+
"step": 1750
|
| 540 |
+
},
|
| 541 |
+
{
|
| 542 |
+
"epoch": 0.3154994667614646,
|
| 543 |
+
"grad_norm": 0.8927256464958191,
|
| 544 |
+
"learning_rate": 1.4568835098335856e-05,
|
| 545 |
+
"loss": 0.04013613700866699,
|
| 546 |
+
"step": 1775
|
| 547 |
+
},
|
| 548 |
+
{
|
| 549 |
+
"epoch": 0.3199431212228937,
|
| 550 |
+
"grad_norm": 1.9027554988861084,
|
| 551 |
+
"learning_rate": 1.447428139183056e-05,
|
| 552 |
+
"loss": 0.04084087371826172,
|
| 553 |
+
"step": 1800
|
| 554 |
+
},
|
| 555 |
+
{
|
| 556 |
+
"epoch": 0.3243867756843228,
|
| 557 |
+
"grad_norm": 1.0974030494689941,
|
| 558 |
+
"learning_rate": 1.4379727685325267e-05,
|
| 559 |
+
"loss": 0.02415942192077637,
|
| 560 |
+
"step": 1825
|
| 561 |
+
},
|
| 562 |
+
{
|
| 563 |
+
"epoch": 0.32883043014575186,
|
| 564 |
+
"grad_norm": 1.2848249673843384,
|
| 565 |
+
"learning_rate": 1.428517397881997e-05,
|
| 566 |
+
"loss": 0.03221212863922119,
|
| 567 |
+
"step": 1850
|
| 568 |
+
},
|
| 569 |
+
{
|
| 570 |
+
"epoch": 0.33327408460718094,
|
| 571 |
+
"grad_norm": 0.8059474229812622,
|
| 572 |
+
"learning_rate": 1.4190620272314676e-05,
|
| 573 |
+
"loss": 0.037272207736968994,
|
| 574 |
+
"step": 1875
|
| 575 |
+
},
|
| 576 |
+
{
|
| 577 |
+
"epoch": 0.33771773906861,
|
| 578 |
+
"grad_norm": 1.0132513046264648,
|
| 579 |
+
"learning_rate": 1.4096066565809382e-05,
|
| 580 |
+
"loss": 0.029253509044647217,
|
| 581 |
+
"step": 1900
|
| 582 |
+
},
|
| 583 |
+
{
|
| 584 |
+
"epoch": 0.3421613935300391,
|
| 585 |
+
"grad_norm": 0.7545719742774963,
|
| 586 |
+
"learning_rate": 1.4001512859304086e-05,
|
| 587 |
+
"loss": 0.03997385501861572,
|
| 588 |
+
"step": 1925
|
| 589 |
+
},
|
| 590 |
+
{
|
| 591 |
+
"epoch": 0.3466050479914682,
|
| 592 |
+
"grad_norm": 0.37751272320747375,
|
| 593 |
+
"learning_rate": 1.3906959152798791e-05,
|
| 594 |
+
"loss": 0.027481729984283446,
|
| 595 |
+
"step": 1950
|
| 596 |
+
},
|
| 597 |
+
{
|
| 598 |
+
"epoch": 0.35104870245289727,
|
| 599 |
+
"grad_norm": 1.522934079170227,
|
| 600 |
+
"learning_rate": 1.3812405446293495e-05,
|
| 601 |
+
"loss": 0.028633484840393065,
|
| 602 |
+
"step": 1975
|
| 603 |
+
},
|
| 604 |
+
{
|
| 605 |
+
"epoch": 0.35549235691432635,
|
| 606 |
+
"grad_norm": 1.2354328632354736,
|
| 607 |
+
"learning_rate": 1.3717851739788202e-05,
|
| 608 |
+
"loss": 0.022525691986083986,
|
| 609 |
+
"step": 2000
|
| 610 |
+
},
|
| 611 |
+
{
|
| 612 |
+
"epoch": 0.35549235691432635,
|
| 613 |
+
"eval_accuracy": 0.936,
|
| 614 |
+
"eval_auroc": 0.9948458144493202,
|
| 615 |
+
"eval_f1": 0.9399624765478424,
|
| 616 |
+
"eval_loss": 0.030829520896077156,
|
| 617 |
+
"eval_runtime": 40.0972,
|
| 618 |
+
"eval_samples_per_second": 49.879,
|
| 619 |
+
"eval_steps_per_second": 1.571,
|
| 620 |
+
"eval_tpr_at_fpr1": 0.9125248508946322,
|
| 621 |
+
"eval_tpr_at_fpr5": 0.9781312127236581,
|
| 622 |
+
"step": 2000
|
| 623 |
+
},
|
| 624 |
+
{
|
| 625 |
+
"epoch": 0.3599360113757554,
|
| 626 |
+
"grad_norm": 1.0933098793029785,
|
| 627 |
+
"learning_rate": 1.3623298033282906e-05,
|
| 628 |
+
"loss": 0.03529852867126465,
|
| 629 |
+
"step": 2025
|
| 630 |
+
},
|
| 631 |
+
{
|
| 632 |
+
"epoch": 0.3643796658371845,
|
| 633 |
+
"grad_norm": 2.0476551055908203,
|
| 634 |
+
"learning_rate": 1.352874432677761e-05,
|
| 635 |
+
"loss": 0.03386972665786743,
|
| 636 |
+
"step": 2050
|
| 637 |
+
},
|
| 638 |
+
{
|
| 639 |
+
"epoch": 0.3688233202986136,
|
| 640 |
+
"grad_norm": 0.9298884868621826,
|
| 641 |
+
"learning_rate": 1.3434190620272315e-05,
|
| 642 |
+
"loss": 0.03486936330795288,
|
| 643 |
+
"step": 2075
|
| 644 |
+
},
|
| 645 |
+
{
|
| 646 |
+
"epoch": 0.37326697476004267,
|
| 647 |
+
"grad_norm": 0.7512989044189453,
|
| 648 |
+
"learning_rate": 1.333963691376702e-05,
|
| 649 |
+
"loss": 0.0267183780670166,
|
| 650 |
+
"step": 2100
|
| 651 |
+
},
|
| 652 |
+
{
|
| 653 |
+
"epoch": 0.37771062922147175,
|
| 654 |
+
"grad_norm": 1.4821196794509888,
|
| 655 |
+
"learning_rate": 1.3245083207261727e-05,
|
| 656 |
+
"loss": 0.021263403892517088,
|
| 657 |
+
"step": 2125
|
| 658 |
+
},
|
| 659 |
+
{
|
| 660 |
+
"epoch": 0.38215428368290083,
|
| 661 |
+
"grad_norm": 0.8745072484016418,
|
| 662 |
+
"learning_rate": 1.315052950075643e-05,
|
| 663 |
+
"loss": 0.0272180438041687,
|
| 664 |
+
"step": 2150
|
| 665 |
+
},
|
| 666 |
+
{
|
| 667 |
+
"epoch": 0.3865979381443299,
|
| 668 |
+
"grad_norm": 1.6741865873336792,
|
| 669 |
+
"learning_rate": 1.3055975794251136e-05,
|
| 670 |
+
"loss": 0.026980955600738526,
|
| 671 |
+
"step": 2175
|
| 672 |
+
},
|
| 673 |
+
{
|
| 674 |
+
"epoch": 0.391041592605759,
|
| 675 |
+
"grad_norm": 0.8200652599334717,
|
| 676 |
+
"learning_rate": 1.2961422087745842e-05,
|
| 677 |
+
"loss": 0.027142252922058106,
|
| 678 |
+
"step": 2200
|
| 679 |
+
},
|
| 680 |
+
{
|
| 681 |
+
"epoch": 0.3954852470671881,
|
| 682 |
+
"grad_norm": 1.5616494417190552,
|
| 683 |
+
"learning_rate": 1.2866868381240545e-05,
|
| 684 |
+
"loss": 0.030536642074584962,
|
| 685 |
+
"step": 2225
|
| 686 |
+
},
|
| 687 |
+
{
|
| 688 |
+
"epoch": 0.39992890152861715,
|
| 689 |
+
"grad_norm": 0.7505294680595398,
|
| 690 |
+
"learning_rate": 1.2772314674735251e-05,
|
| 691 |
+
"loss": 0.03007654905319214,
|
| 692 |
+
"step": 2250
|
| 693 |
+
},
|
| 694 |
+
{
|
| 695 |
+
"epoch": 0.40437255599004623,
|
| 696 |
+
"grad_norm": 0.3857294023036957,
|
| 697 |
+
"learning_rate": 1.2677760968229955e-05,
|
| 698 |
+
"loss": 0.02641111135482788,
|
| 699 |
+
"step": 2275
|
| 700 |
+
},
|
| 701 |
+
{
|
| 702 |
+
"epoch": 0.4088162104514753,
|
| 703 |
+
"grad_norm": 0.9879816174507141,
|
| 704 |
+
"learning_rate": 1.2583207261724662e-05,
|
| 705 |
+
"loss": 0.027107694149017335,
|
| 706 |
+
"step": 2300
|
| 707 |
+
},
|
| 708 |
+
{
|
| 709 |
+
"epoch": 0.4132598649129044,
|
| 710 |
+
"grad_norm": 0.5398420095443726,
|
| 711 |
+
"learning_rate": 1.2488653555219366e-05,
|
| 712 |
+
"loss": 0.027692139148712158,
|
| 713 |
+
"step": 2325
|
| 714 |
+
},
|
| 715 |
+
{
|
| 716 |
+
"epoch": 0.4177035193743335,
|
| 717 |
+
"grad_norm": 0.8365870118141174,
|
| 718 |
+
"learning_rate": 1.239409984871407e-05,
|
| 719 |
+
"loss": 0.029139807224273683,
|
| 720 |
+
"step": 2350
|
| 721 |
+
},
|
| 722 |
+
{
|
| 723 |
+
"epoch": 0.42214717383576256,
|
| 724 |
+
"grad_norm": 1.1654356718063354,
|
| 725 |
+
"learning_rate": 1.2299546142208775e-05,
|
| 726 |
+
"loss": 0.025624027252197267,
|
| 727 |
+
"step": 2375
|
| 728 |
+
},
|
| 729 |
+
{
|
| 730 |
+
"epoch": 0.42659082829719164,
|
| 731 |
+
"grad_norm": 0.8927724361419678,
|
| 732 |
+
"learning_rate": 1.220499243570348e-05,
|
| 733 |
+
"loss": 0.032838408946990964,
|
| 734 |
+
"step": 2400
|
| 735 |
+
},
|
| 736 |
+
{
|
| 737 |
+
"epoch": 0.43103448275862066,
|
| 738 |
+
"grad_norm": 1.7535399198532104,
|
| 739 |
+
"learning_rate": 1.2110438729198187e-05,
|
| 740 |
+
"loss": 0.03213581085205078,
|
| 741 |
+
"step": 2425
|
| 742 |
+
},
|
| 743 |
+
{
|
| 744 |
+
"epoch": 0.43547813722004974,
|
| 745 |
+
"grad_norm": 1.506422996520996,
|
| 746 |
+
"learning_rate": 1.201588502269289e-05,
|
| 747 |
+
"loss": 0.031965067386627195,
|
| 748 |
+
"step": 2450
|
| 749 |
+
},
|
| 750 |
+
{
|
| 751 |
+
"epoch": 0.4399217916814788,
|
| 752 |
+
"grad_norm": 1.933950424194336,
|
| 753 |
+
"learning_rate": 1.1921331316187596e-05,
|
| 754 |
+
"loss": 0.02685023784637451,
|
| 755 |
+
"step": 2475
|
| 756 |
+
},
|
| 757 |
+
{
|
| 758 |
+
"epoch": 0.4443654461429079,
|
| 759 |
+
"grad_norm": 0.8511770963668823,
|
| 760 |
+
"learning_rate": 1.18267776096823e-05,
|
| 761 |
+
"loss": 0.03357476472854614,
|
| 762 |
+
"step": 2500
|
| 763 |
+
},
|
| 764 |
+
{
|
| 765 |
+
"epoch": 0.4443654461429079,
|
| 766 |
+
"eval_accuracy": 0.9095,
|
| 767 |
+
"eval_auroc": 0.9952128276617958,
|
| 768 |
+
"eval_f1": 0.9173893199452305,
|
| 769 |
+
"eval_loss": 0.030984506011009216,
|
| 770 |
+
"eval_runtime": 40.8627,
|
| 771 |
+
"eval_samples_per_second": 48.944,
|
| 772 |
+
"eval_steps_per_second": 1.542,
|
| 773 |
+
"eval_tpr_at_fpr1": 0.856858846918489,
|
| 774 |
+
"eval_tpr_at_fpr5": 0.9821073558648111,
|
| 775 |
+
"step": 2500
|
| 776 |
+
},
|
| 777 |
+
{
|
| 778 |
+
"epoch": 0.448809100604337,
|
| 779 |
+
"grad_norm": 1.3411929607391357,
|
| 780 |
+
"learning_rate": 1.1732223903177005e-05,
|
| 781 |
+
"loss": 0.026271984577178956,
|
| 782 |
+
"step": 2525
|
| 783 |
+
},
|
| 784 |
+
{
|
| 785 |
+
"epoch": 0.45325275506576607,
|
| 786 |
+
"grad_norm": 1.3650128841400146,
|
| 787 |
+
"learning_rate": 1.1637670196671711e-05,
|
| 788 |
+
"loss": 0.030547237396240233,
|
| 789 |
+
"step": 2550
|
| 790 |
+
},
|
| 791 |
+
{
|
| 792 |
+
"epoch": 0.45769640952719515,
|
| 793 |
+
"grad_norm": 0.7035048007965088,
|
| 794 |
+
"learning_rate": 1.1543116490166415e-05,
|
| 795 |
+
"loss": 0.026542000770568848,
|
| 796 |
+
"step": 2575
|
| 797 |
+
},
|
| 798 |
+
{
|
| 799 |
+
"epoch": 0.46214006398862423,
|
| 800 |
+
"grad_norm": 1.3388855457305908,
|
| 801 |
+
"learning_rate": 1.1448562783661122e-05,
|
| 802 |
+
"loss": 0.024521036148071287,
|
| 803 |
+
"step": 2600
|
| 804 |
+
},
|
| 805 |
+
{
|
| 806 |
+
"epoch": 0.4665837184500533,
|
| 807 |
+
"grad_norm": 1.0085132122039795,
|
| 808 |
+
"learning_rate": 1.1354009077155826e-05,
|
| 809 |
+
"loss": 0.024952406883239745,
|
| 810 |
+
"step": 2625
|
| 811 |
+
},
|
| 812 |
+
{
|
| 813 |
+
"epoch": 0.4710273729114824,
|
| 814 |
+
"grad_norm": 0.30464261770248413,
|
| 815 |
+
"learning_rate": 1.125945537065053e-05,
|
| 816 |
+
"loss": 0.02288907766342163,
|
| 817 |
+
"step": 2650
|
| 818 |
+
},
|
| 819 |
+
{
|
| 820 |
+
"epoch": 0.47547102737291147,
|
| 821 |
+
"grad_norm": 0.6784248948097229,
|
| 822 |
+
"learning_rate": 1.1164901664145235e-05,
|
| 823 |
+
"loss": 0.024585678577423095,
|
| 824 |
+
"step": 2675
|
| 825 |
+
},
|
| 826 |
+
{
|
| 827 |
+
"epoch": 0.47991468183434055,
|
| 828 |
+
"grad_norm": 1.2737281322479248,
|
| 829 |
+
"learning_rate": 1.107034795763994e-05,
|
| 830 |
+
"loss": 0.028601126670837404,
|
| 831 |
+
"step": 2700
|
| 832 |
+
},
|
| 833 |
+
{
|
| 834 |
+
"epoch": 0.48435833629576963,
|
| 835 |
+
"grad_norm": 1.2060391902923584,
|
| 836 |
+
"learning_rate": 1.0975794251134646e-05,
|
| 837 |
+
"loss": 0.03009690284729004,
|
| 838 |
+
"step": 2725
|
| 839 |
+
},
|
| 840 |
+
{
|
| 841 |
+
"epoch": 0.4888019907571987,
|
| 842 |
+
"grad_norm": 0.9331129789352417,
|
| 843 |
+
"learning_rate": 1.088124054462935e-05,
|
| 844 |
+
"loss": 0.024897255897521973,
|
| 845 |
+
"step": 2750
|
| 846 |
+
},
|
| 847 |
+
{
|
| 848 |
+
"epoch": 0.4932456452186278,
|
| 849 |
+
"grad_norm": 0.7035834789276123,
|
| 850 |
+
"learning_rate": 1.0786686838124056e-05,
|
| 851 |
+
"loss": 0.029437661170959473,
|
| 852 |
+
"step": 2775
|
| 853 |
+
},
|
| 854 |
+
{
|
| 855 |
+
"epoch": 0.4976892996800569,
|
| 856 |
+
"grad_norm": 1.3447843790054321,
|
| 857 |
+
"learning_rate": 1.069213313161876e-05,
|
| 858 |
+
"loss": 0.024274458885192873,
|
| 859 |
+
"step": 2800
|
| 860 |
+
},
|
| 861 |
+
{
|
| 862 |
+
"epoch": 0.502132954141486,
|
| 863 |
+
"grad_norm": 0.7223392724990845,
|
| 864 |
+
"learning_rate": 1.0597579425113464e-05,
|
| 865 |
+
"loss": 0.029445352554321288,
|
| 866 |
+
"step": 2825
|
| 867 |
+
},
|
| 868 |
+
{
|
| 869 |
+
"epoch": 0.5065766086029151,
|
| 870 |
+
"grad_norm": 1.4334781169891357,
|
| 871 |
+
"learning_rate": 1.0503025718608171e-05,
|
| 872 |
+
"loss": 0.0277593731880188,
|
| 873 |
+
"step": 2850
|
| 874 |
+
},
|
| 875 |
+
{
|
| 876 |
+
"epoch": 0.5110202630643441,
|
| 877 |
+
"grad_norm": 1.4802097082138062,
|
| 878 |
+
"learning_rate": 1.0408472012102875e-05,
|
| 879 |
+
"loss": 0.029638910293579103,
|
| 880 |
+
"step": 2875
|
| 881 |
+
},
|
| 882 |
+
{
|
| 883 |
+
"epoch": 0.5154639175257731,
|
| 884 |
+
"grad_norm": 1.0358779430389404,
|
| 885 |
+
"learning_rate": 1.031391830559758e-05,
|
| 886 |
+
"loss": 0.021964311599731445,
|
| 887 |
+
"step": 2900
|
| 888 |
+
},
|
| 889 |
+
{
|
| 890 |
+
"epoch": 0.5199075719872023,
|
| 891 |
+
"grad_norm": 0.5717483758926392,
|
| 892 |
+
"learning_rate": 1.0219364599092286e-05,
|
| 893 |
+
"loss": 0.02695397138595581,
|
| 894 |
+
"step": 2925
|
| 895 |
+
},
|
| 896 |
+
{
|
| 897 |
+
"epoch": 0.5243512264486313,
|
| 898 |
+
"grad_norm": 0.5034074783325195,
|
| 899 |
+
"learning_rate": 1.012481089258699e-05,
|
| 900 |
+
"loss": 0.02091419219970703,
|
| 901 |
+
"step": 2950
|
| 902 |
+
},
|
| 903 |
+
{
|
| 904 |
+
"epoch": 0.5287948809100604,
|
| 905 |
+
"grad_norm": 1.093700647354126,
|
| 906 |
+
"learning_rate": 1.0030257186081695e-05,
|
| 907 |
+
"loss": 0.018702698945999144,
|
| 908 |
+
"step": 2975
|
| 909 |
+
},
|
| 910 |
+
{
|
| 911 |
+
"epoch": 0.5332385353714895,
|
| 912 |
+
"grad_norm": 0.5766699910163879,
|
| 913 |
+
"learning_rate": 9.935703479576401e-06,
|
| 914 |
+
"loss": 0.02352435827255249,
|
| 915 |
+
"step": 3000
|
| 916 |
+
},
|
| 917 |
+
{
|
| 918 |
+
"epoch": 0.5332385353714895,
|
| 919 |
+
"eval_accuracy": 0.903,
|
| 920 |
+
"eval_auroc": 0.992703737334544,
|
| 921 |
+
"eval_f1": 0.9120580235720762,
|
| 922 |
+
"eval_loss": 0.036512941122055054,
|
| 923 |
+
"eval_runtime": 40.4309,
|
| 924 |
+
"eval_samples_per_second": 49.467,
|
| 925 |
+
"eval_steps_per_second": 1.558,
|
| 926 |
+
"eval_tpr_at_fpr1": 0.852882703777336,
|
| 927 |
+
"eval_tpr_at_fpr5": 0.9582504970178927,
|
| 928 |
+
"step": 3000
|
| 929 |
+
},
|
| 930 |
+
{
|
| 931 |
+
"epoch": 0.5376821898329186,
|
| 932 |
+
"grad_norm": 1.6101889610290527,
|
| 933 |
+
"learning_rate": 9.841149773071105e-06,
|
| 934 |
+
"loss": 0.02831456422805786,
|
| 935 |
+
"step": 3025
|
| 936 |
+
},
|
| 937 |
+
{
|
| 938 |
+
"epoch": 0.5421258442943476,
|
| 939 |
+
"grad_norm": 1.7061760425567627,
|
| 940 |
+
"learning_rate": 9.74659606656581e-06,
|
| 941 |
+
"loss": 0.023692820072174072,
|
| 942 |
+
"step": 3050
|
| 943 |
+
},
|
| 944 |
+
{
|
| 945 |
+
"epoch": 0.5465694987557768,
|
| 946 |
+
"grad_norm": 1.293489933013916,
|
| 947 |
+
"learning_rate": 9.652042360060516e-06,
|
| 948 |
+
"loss": 0.021458499431610108,
|
| 949 |
+
"step": 3075
|
| 950 |
+
},
|
| 951 |
+
{
|
| 952 |
+
"epoch": 0.5510131532172058,
|
| 953 |
+
"grad_norm": 1.280171513557434,
|
| 954 |
+
"learning_rate": 9.55748865355522e-06,
|
| 955 |
+
"loss": 0.023303213119506835,
|
| 956 |
+
"step": 3100
|
| 957 |
+
},
|
| 958 |
+
{
|
| 959 |
+
"epoch": 0.5554568076786349,
|
| 960 |
+
"grad_norm": 1.2874751091003418,
|
| 961 |
+
"learning_rate": 9.462934947049925e-06,
|
| 962 |
+
"loss": 0.027433459758758546,
|
| 963 |
+
"step": 3125
|
| 964 |
+
},
|
| 965 |
+
{
|
| 966 |
+
"epoch": 0.559900462140064,
|
| 967 |
+
"grad_norm": 1.2265180349349976,
|
| 968 |
+
"learning_rate": 9.36838124054463e-06,
|
| 969 |
+
"loss": 0.02306551456451416,
|
| 970 |
+
"step": 3150
|
| 971 |
+
},
|
| 972 |
+
{
|
| 973 |
+
"epoch": 0.5643441166014931,
|
| 974 |
+
"grad_norm": 2.207395076751709,
|
| 975 |
+
"learning_rate": 9.273827534039335e-06,
|
| 976 |
+
"loss": 0.030330984592437743,
|
| 977 |
+
"step": 3175
|
| 978 |
+
},
|
| 979 |
+
{
|
| 980 |
+
"epoch": 0.5687877710629221,
|
| 981 |
+
"grad_norm": 0.700985312461853,
|
| 982 |
+
"learning_rate": 9.17927382753404e-06,
|
| 983 |
+
"loss": 0.02533963918685913,
|
| 984 |
+
"step": 3200
|
| 985 |
+
},
|
| 986 |
+
{
|
| 987 |
+
"epoch": 0.5732314255243512,
|
| 988 |
+
"grad_norm": 0.8443852663040161,
|
| 989 |
+
"learning_rate": 9.084720121028746e-06,
|
| 990 |
+
"loss": 0.029710006713867188,
|
| 991 |
+
"step": 3225
|
| 992 |
+
},
|
| 993 |
+
{
|
| 994 |
+
"epoch": 0.5776750799857803,
|
| 995 |
+
"grad_norm": 0.5237564444541931,
|
| 996 |
+
"learning_rate": 8.99016641452345e-06,
|
| 997 |
+
"loss": 0.02827130079269409,
|
| 998 |
+
"step": 3250
|
| 999 |
+
},
|
| 1000 |
+
{
|
| 1001 |
+
"epoch": 0.5821187344472094,
|
| 1002 |
+
"grad_norm": 1.318710446357727,
|
| 1003 |
+
"learning_rate": 8.895612708018155e-06,
|
| 1004 |
+
"loss": 0.017649848461151123,
|
| 1005 |
+
"step": 3275
|
| 1006 |
+
},
|
| 1007 |
+
{
|
| 1008 |
+
"epoch": 0.5865623889086384,
|
| 1009 |
+
"grad_norm": 2.1418726444244385,
|
| 1010 |
+
"learning_rate": 8.80105900151286e-06,
|
| 1011 |
+
"loss": 0.028039700984954834,
|
| 1012 |
+
"step": 3300
|
| 1013 |
+
},
|
| 1014 |
+
{
|
| 1015 |
+
"epoch": 0.5910060433700676,
|
| 1016 |
+
"grad_norm": 0.6394239068031311,
|
| 1017 |
+
"learning_rate": 8.706505295007565e-06,
|
| 1018 |
+
"loss": 0.029724645614624023,
|
| 1019 |
+
"step": 3325
|
| 1020 |
+
},
|
| 1021 |
+
{
|
| 1022 |
+
"epoch": 0.5954496978314966,
|
| 1023 |
+
"grad_norm": 0.44896772503852844,
|
| 1024 |
+
"learning_rate": 8.61195158850227e-06,
|
| 1025 |
+
"loss": 0.026093797683715822,
|
| 1026 |
+
"step": 3350
|
| 1027 |
+
},
|
| 1028 |
+
{
|
| 1029 |
+
"epoch": 0.5998933522929257,
|
| 1030 |
+
"grad_norm": 2.3762757778167725,
|
| 1031 |
+
"learning_rate": 8.517397881996974e-06,
|
| 1032 |
+
"loss": 0.027712843418121337,
|
| 1033 |
+
"step": 3375
|
| 1034 |
+
},
|
| 1035 |
+
{
|
| 1036 |
+
"epoch": 0.6043370067543548,
|
| 1037 |
+
"grad_norm": 1.4584051370620728,
|
| 1038 |
+
"learning_rate": 8.42284417549168e-06,
|
| 1039 |
+
"loss": 0.031196737289428712,
|
| 1040 |
+
"step": 3400
|
| 1041 |
+
},
|
| 1042 |
+
{
|
| 1043 |
+
"epoch": 0.6087806612157839,
|
| 1044 |
+
"grad_norm": 2.2569475173950195,
|
| 1045 |
+
"learning_rate": 8.328290468986385e-06,
|
| 1046 |
+
"loss": 0.026621932983398437,
|
| 1047 |
+
"step": 3425
|
| 1048 |
+
},
|
| 1049 |
+
{
|
| 1050 |
+
"epoch": 0.6132243156772129,
|
| 1051 |
+
"grad_norm": 1.9737194776535034,
|
| 1052 |
+
"learning_rate": 8.23373676248109e-06,
|
| 1053 |
+
"loss": 0.020163617134094237,
|
| 1054 |
+
"step": 3450
|
| 1055 |
+
},
|
| 1056 |
+
{
|
| 1057 |
+
"epoch": 0.617667970138642,
|
| 1058 |
+
"grad_norm": 0.9083975553512573,
|
| 1059 |
+
"learning_rate": 8.139183055975795e-06,
|
| 1060 |
+
"loss": 0.0209149169921875,
|
| 1061 |
+
"step": 3475
|
| 1062 |
+
},
|
| 1063 |
+
{
|
| 1064 |
+
"epoch": 0.6221116246000711,
|
| 1065 |
+
"grad_norm": 1.0563571453094482,
|
| 1066 |
+
"learning_rate": 8.0446293494705e-06,
|
| 1067 |
+
"loss": 0.025532805919647218,
|
| 1068 |
+
"step": 3500
|
| 1069 |
+
},
|
| 1070 |
+
{
|
| 1071 |
+
"epoch": 0.6221116246000711,
|
| 1072 |
+
"eval_accuracy": 0.8925,
|
| 1073 |
+
"eval_auroc": 0.9965268749674989,
|
| 1074 |
+
"eval_f1": 0.9033707865168539,
|
| 1075 |
+
"eval_loss": 0.029821457341313362,
|
| 1076 |
+
"eval_runtime": 40.7571,
|
| 1077 |
+
"eval_samples_per_second": 49.071,
|
| 1078 |
+
"eval_steps_per_second": 1.546,
|
| 1079 |
+
"eval_tpr_at_fpr1": 0.9254473161033797,
|
| 1080 |
+
"eval_tpr_at_fpr5": 0.9850894632206759,
|
| 1081 |
+
"step": 3500
|
| 1082 |
+
},
|
| 1083 |
+
{
|
| 1084 |
+
"epoch": 0.6265552790615002,
|
| 1085 |
+
"grad_norm": 1.6384830474853516,
|
| 1086 |
+
"learning_rate": 7.950075642965204e-06,
|
| 1087 |
+
"loss": 0.018657710552215576,
|
| 1088 |
+
"step": 3525
|
| 1089 |
+
},
|
| 1090 |
+
{
|
| 1091 |
+
"epoch": 0.6309989335229292,
|
| 1092 |
+
"grad_norm": 1.4129881858825684,
|
| 1093 |
+
"learning_rate": 7.85552193645991e-06,
|
| 1094 |
+
"loss": 0.027512576580047608,
|
| 1095 |
+
"step": 3550
|
| 1096 |
+
},
|
| 1097 |
+
{
|
| 1098 |
+
"epoch": 0.6354425879843584,
|
| 1099 |
+
"grad_norm": 1.2471665143966675,
|
| 1100 |
+
"learning_rate": 7.760968229954615e-06,
|
| 1101 |
+
"loss": 0.029382569789886473,
|
| 1102 |
+
"step": 3575
|
| 1103 |
+
},
|
| 1104 |
+
{
|
| 1105 |
+
"epoch": 0.6398862424457874,
|
| 1106 |
+
"grad_norm": 1.1254513263702393,
|
| 1107 |
+
"learning_rate": 7.66641452344932e-06,
|
| 1108 |
+
"loss": 0.023775274753570556,
|
| 1109 |
+
"step": 3600
|
| 1110 |
+
},
|
| 1111 |
+
{
|
| 1112 |
+
"epoch": 0.6443298969072165,
|
| 1113 |
+
"grad_norm": 0.9185925126075745,
|
| 1114 |
+
"learning_rate": 7.571860816944025e-06,
|
| 1115 |
+
"loss": 0.025625219345092775,
|
| 1116 |
+
"step": 3625
|
| 1117 |
+
},
|
| 1118 |
+
{
|
| 1119 |
+
"epoch": 0.6487735513686456,
|
| 1120 |
+
"grad_norm": 0.9741719961166382,
|
| 1121 |
+
"learning_rate": 7.477307110438729e-06,
|
| 1122 |
+
"loss": 0.014400173425674439,
|
| 1123 |
+
"step": 3650
|
| 1124 |
+
},
|
| 1125 |
+
{
|
| 1126 |
+
"epoch": 0.6532172058300747,
|
| 1127 |
+
"grad_norm": 1.5722410678863525,
|
| 1128 |
+
"learning_rate": 7.382753403933435e-06,
|
| 1129 |
+
"loss": 0.016961036920547484,
|
| 1130 |
+
"step": 3675
|
| 1131 |
+
},
|
| 1132 |
+
{
|
| 1133 |
+
"epoch": 0.6576608602915037,
|
| 1134 |
+
"grad_norm": 1.0956284999847412,
|
| 1135 |
+
"learning_rate": 7.28819969742814e-06,
|
| 1136 |
+
"loss": 0.029399728775024413,
|
| 1137 |
+
"step": 3700
|
| 1138 |
+
},
|
| 1139 |
+
{
|
| 1140 |
+
"epoch": 0.6621045147529329,
|
| 1141 |
+
"grad_norm": 1.8072013854980469,
|
| 1142 |
+
"learning_rate": 7.193645990922845e-06,
|
| 1143 |
+
"loss": 0.02603915214538574,
|
| 1144 |
+
"step": 3725
|
| 1145 |
+
},
|
| 1146 |
+
{
|
| 1147 |
+
"epoch": 0.6665481692143619,
|
| 1148 |
+
"grad_norm": 1.4998871088027954,
|
| 1149 |
+
"learning_rate": 7.09909228441755e-06,
|
| 1150 |
+
"loss": 0.018154734373092653,
|
| 1151 |
+
"step": 3750
|
| 1152 |
+
},
|
| 1153 |
+
{
|
| 1154 |
+
"epoch": 0.670991823675791,
|
| 1155 |
+
"grad_norm": 1.015345573425293,
|
| 1156 |
+
"learning_rate": 7.004538577912255e-06,
|
| 1157 |
+
"loss": 0.020612461566925047,
|
| 1158 |
+
"step": 3775
|
| 1159 |
+
},
|
| 1160 |
+
{
|
| 1161 |
+
"epoch": 0.67543547813722,
|
| 1162 |
+
"grad_norm": 0.518636167049408,
|
| 1163 |
+
"learning_rate": 6.909984871406959e-06,
|
| 1164 |
+
"loss": 0.020666675567626955,
|
| 1165 |
+
"step": 3800
|
| 1166 |
+
},
|
| 1167 |
+
{
|
| 1168 |
+
"epoch": 0.6798791325986492,
|
| 1169 |
+
"grad_norm": 1.4760479927062988,
|
| 1170 |
+
"learning_rate": 6.815431164901665e-06,
|
| 1171 |
+
"loss": 0.022126734256744385,
|
| 1172 |
+
"step": 3825
|
| 1173 |
+
},
|
| 1174 |
+
{
|
| 1175 |
+
"epoch": 0.6843227870600782,
|
| 1176 |
+
"grad_norm": 0.5096405744552612,
|
| 1177 |
+
"learning_rate": 6.7208774583963696e-06,
|
| 1178 |
+
"loss": 0.023763720989227296,
|
| 1179 |
+
"step": 3850
|
| 1180 |
+
},
|
| 1181 |
+
{
|
| 1182 |
+
"epoch": 0.6887664415215072,
|
| 1183 |
+
"grad_norm": 0.7516443133354187,
|
| 1184 |
+
"learning_rate": 6.626323751891075e-06,
|
| 1185 |
+
"loss": 0.023717043399810792,
|
| 1186 |
+
"step": 3875
|
| 1187 |
+
},
|
| 1188 |
+
{
|
| 1189 |
+
"epoch": 0.6932100959829364,
|
| 1190 |
+
"grad_norm": 0.8385019898414612,
|
| 1191 |
+
"learning_rate": 6.53177004538578e-06,
|
| 1192 |
+
"loss": 0.021878042221069337,
|
| 1193 |
+
"step": 3900
|
| 1194 |
+
},
|
| 1195 |
+
{
|
| 1196 |
+
"epoch": 0.6976537504443654,
|
| 1197 |
+
"grad_norm": 1.1693350076675415,
|
| 1198 |
+
"learning_rate": 6.4372163388804845e-06,
|
| 1199 |
+
"loss": 0.014371514320373535,
|
| 1200 |
+
"step": 3925
|
| 1201 |
+
},
|
| 1202 |
+
{
|
| 1203 |
+
"epoch": 0.7020974049057945,
|
| 1204 |
+
"grad_norm": 1.4496546983718872,
|
| 1205 |
+
"learning_rate": 6.342662632375189e-06,
|
| 1206 |
+
"loss": 0.021327991485595704,
|
| 1207 |
+
"step": 3950
|
| 1208 |
+
},
|
| 1209 |
+
{
|
| 1210 |
+
"epoch": 0.7065410593672236,
|
| 1211 |
+
"grad_norm": 1.0142734050750732,
|
| 1212 |
+
"learning_rate": 6.248108925869895e-06,
|
| 1213 |
+
"loss": 0.023486480712890626,
|
| 1214 |
+
"step": 3975
|
| 1215 |
+
},
|
| 1216 |
+
{
|
| 1217 |
+
"epoch": 0.7109847138286527,
|
| 1218 |
+
"grad_norm": 0.4203350245952606,
|
| 1219 |
+
"learning_rate": 6.1535552193645995e-06,
|
| 1220 |
+
"loss": 0.023264715671539305,
|
| 1221 |
+
"step": 4000
|
| 1222 |
+
},
|
| 1223 |
+
{
|
| 1224 |
+
"epoch": 0.7109847138286527,
|
| 1225 |
+
"eval_accuracy": 0.9335,
|
| 1226 |
+
"eval_auroc": 0.9970688944802013,
|
| 1227 |
+
"eval_f1": 0.9379374708352777,
|
| 1228 |
+
"eval_loss": 0.02452407218515873,
|
| 1229 |
+
"eval_runtime": 40.1446,
|
| 1230 |
+
"eval_samples_per_second": 49.82,
|
| 1231 |
+
"eval_steps_per_second": 1.569,
|
| 1232 |
+
"eval_tpr_at_fpr1": 0.9264413518886679,
|
| 1233 |
+
"eval_tpr_at_fpr5": 0.9850894632206759,
|
| 1234 |
+
"step": 4000
|
| 1235 |
+
},
|
| 1236 |
+
{
|
| 1237 |
+
"epoch": 0.7154283682900817,
|
| 1238 |
+
"grad_norm": 0.6930143237113953,
|
| 1239 |
+
"learning_rate": 6.059001512859305e-06,
|
| 1240 |
+
"loss": 0.01856299042701721,
|
| 1241 |
+
"step": 4025
|
| 1242 |
+
},
|
| 1243 |
+
{
|
| 1244 |
+
"epoch": 0.7198720227515109,
|
| 1245 |
+
"grad_norm": 1.0674962997436523,
|
| 1246 |
+
"learning_rate": 5.96444780635401e-06,
|
| 1247 |
+
"loss": 0.023626606464385986,
|
| 1248 |
+
"step": 4050
|
| 1249 |
+
},
|
| 1250 |
+
{
|
| 1251 |
+
"epoch": 0.7243156772129399,
|
| 1252 |
+
"grad_norm": 0.6356366276741028,
|
| 1253 |
+
"learning_rate": 5.8698940998487145e-06,
|
| 1254 |
+
"loss": 0.023326983451843263,
|
| 1255 |
+
"step": 4075
|
| 1256 |
+
},
|
| 1257 |
+
{
|
| 1258 |
+
"epoch": 0.728759331674369,
|
| 1259 |
+
"grad_norm": 0.8227376937866211,
|
| 1260 |
+
"learning_rate": 5.775340393343419e-06,
|
| 1261 |
+
"loss": 0.02331566095352173,
|
| 1262 |
+
"step": 4100
|
| 1263 |
+
},
|
| 1264 |
+
{
|
| 1265 |
+
"epoch": 0.733202986135798,
|
| 1266 |
+
"grad_norm": 2.189657211303711,
|
| 1267 |
+
"learning_rate": 5.680786686838125e-06,
|
| 1268 |
+
"loss": 0.01987994074821472,
|
| 1269 |
+
"step": 4125
|
| 1270 |
+
},
|
| 1271 |
+
{
|
| 1272 |
+
"epoch": 0.7376466405972272,
|
| 1273 |
+
"grad_norm": 0.46455055475234985,
|
| 1274 |
+
"learning_rate": 5.5862329803328295e-06,
|
| 1275 |
+
"loss": 0.01780161142349243,
|
| 1276 |
+
"step": 4150
|
| 1277 |
+
},
|
| 1278 |
+
{
|
| 1279 |
+
"epoch": 0.7420902950586562,
|
| 1280 |
+
"grad_norm": 0.7525627017021179,
|
| 1281 |
+
"learning_rate": 5.491679273827535e-06,
|
| 1282 |
+
"loss": 0.02872683048248291,
|
| 1283 |
+
"step": 4175
|
| 1284 |
+
},
|
| 1285 |
+
{
|
| 1286 |
+
"epoch": 0.7465339495200853,
|
| 1287 |
+
"grad_norm": 0.9939025640487671,
|
| 1288 |
+
"learning_rate": 5.39712556732224e-06,
|
| 1289 |
+
"loss": 0.021651785373687744,
|
| 1290 |
+
"step": 4200
|
| 1291 |
+
},
|
| 1292 |
+
{
|
| 1293 |
+
"epoch": 0.7509776039815144,
|
| 1294 |
+
"grad_norm": 0.5748035907745361,
|
| 1295 |
+
"learning_rate": 5.3025718608169445e-06,
|
| 1296 |
+
"loss": 0.01857919096946716,
|
| 1297 |
+
"step": 4225
|
| 1298 |
+
},
|
| 1299 |
+
{
|
| 1300 |
+
"epoch": 0.7554212584429435,
|
| 1301 |
+
"grad_norm": 1.1377756595611572,
|
| 1302 |
+
"learning_rate": 5.208018154311649e-06,
|
| 1303 |
+
"loss": 0.021290059089660644,
|
| 1304 |
+
"step": 4250
|
| 1305 |
+
},
|
| 1306 |
+
{
|
| 1307 |
+
"epoch": 0.7598649129043725,
|
| 1308 |
+
"grad_norm": 1.592410683631897,
|
| 1309 |
+
"learning_rate": 5.113464447806355e-06,
|
| 1310 |
+
"loss": 0.01949896812438965,
|
| 1311 |
+
"step": 4275
|
| 1312 |
+
},
|
| 1313 |
+
{
|
| 1314 |
+
"epoch": 0.7643085673658017,
|
| 1315 |
+
"grad_norm": 1.3217352628707886,
|
| 1316 |
+
"learning_rate": 5.0189107413010595e-06,
|
| 1317 |
+
"loss": 0.024791300296783447,
|
| 1318 |
+
"step": 4300
|
| 1319 |
+
},
|
| 1320 |
+
{
|
| 1321 |
+
"epoch": 0.7687522218272307,
|
| 1322 |
+
"grad_norm": 0.3922988474369049,
|
| 1323 |
+
"learning_rate": 4.924357034795764e-06,
|
| 1324 |
+
"loss": 0.021808433532714843,
|
| 1325 |
+
"step": 4325
|
| 1326 |
+
},
|
| 1327 |
+
{
|
| 1328 |
+
"epoch": 0.7731958762886598,
|
| 1329 |
+
"grad_norm": 0.45381656289100647,
|
| 1330 |
+
"learning_rate": 4.82980332829047e-06,
|
| 1331 |
+
"loss": 0.020455398559570313,
|
| 1332 |
+
"step": 4350
|
| 1333 |
+
},
|
| 1334 |
+
{
|
| 1335 |
+
"epoch": 0.7776395307500888,
|
| 1336 |
+
"grad_norm": 0.8540909886360168,
|
| 1337 |
+
"learning_rate": 4.7352496217851745e-06,
|
| 1338 |
+
"loss": 0.023225700855255126,
|
| 1339 |
+
"step": 4375
|
| 1340 |
+
},
|
| 1341 |
+
{
|
| 1342 |
+
"epoch": 0.782083185211518,
|
| 1343 |
+
"grad_norm": 2.9069783687591553,
|
| 1344 |
+
"learning_rate": 4.640695915279879e-06,
|
| 1345 |
+
"loss": 0.023292510509490966,
|
| 1346 |
+
"step": 4400
|
| 1347 |
+
},
|
| 1348 |
+
{
|
| 1349 |
+
"epoch": 0.786526839672947,
|
| 1350 |
+
"grad_norm": 0.9787670969963074,
|
| 1351 |
+
"learning_rate": 4.546142208774585e-06,
|
| 1352 |
+
"loss": 0.024890389442443848,
|
| 1353 |
+
"step": 4425
|
| 1354 |
+
},
|
| 1355 |
+
{
|
| 1356 |
+
"epoch": 0.7909704941343761,
|
| 1357 |
+
"grad_norm": 1.0303661823272705,
|
| 1358 |
+
"learning_rate": 4.4515885022692894e-06,
|
| 1359 |
+
"loss": 0.023072149753570557,
|
| 1360 |
+
"step": 4450
|
| 1361 |
+
},
|
| 1362 |
+
{
|
| 1363 |
+
"epoch": 0.7954141485958052,
|
| 1364 |
+
"grad_norm": 2.1931862831115723,
|
| 1365 |
+
"learning_rate": 4.357034795763994e-06,
|
| 1366 |
+
"loss": 0.032431015968322756,
|
| 1367 |
+
"step": 4475
|
| 1368 |
+
},
|
| 1369 |
+
{
|
| 1370 |
+
"epoch": 0.7998578030572343,
|
| 1371 |
+
"grad_norm": 0.7739485502243042,
|
| 1372 |
+
"learning_rate": 4.2624810892587e-06,
|
| 1373 |
+
"loss": 0.020095715522766112,
|
| 1374 |
+
"step": 4500
|
| 1375 |
+
},
|
| 1376 |
+
{
|
| 1377 |
+
"epoch": 0.7998578030572343,
|
| 1378 |
+
"eval_accuracy": 0.9275,
|
| 1379 |
+
"eval_auroc": 0.9970798948762156,
|
| 1380 |
+
"eval_f1": 0.9327146171693736,
|
| 1381 |
+
"eval_loss": 0.024173183366656303,
|
| 1382 |
+
"eval_runtime": 39.1522,
|
| 1383 |
+
"eval_samples_per_second": 51.083,
|
| 1384 |
+
"eval_steps_per_second": 1.609,
|
| 1385 |
+
"eval_tpr_at_fpr1": 0.9224652087475149,
|
| 1386 |
+
"eval_tpr_at_fpr5": 0.9900596421471173,
|
| 1387 |
+
"step": 4500
|
| 1388 |
+
},
|
| 1389 |
+
{
|
| 1390 |
+
"epoch": 0.8043014575186633,
|
| 1391 |
+
"grad_norm": 1.5598735809326172,
|
| 1392 |
+
"learning_rate": 4.167927382753404e-06,
|
| 1393 |
+
"loss": 0.024987099170684816,
|
| 1394 |
+
"step": 4525
|
| 1395 |
+
},
|
| 1396 |
+
{
|
| 1397 |
+
"epoch": 0.8087451119800925,
|
| 1398 |
+
"grad_norm": 1.1426900625228882,
|
| 1399 |
+
"learning_rate": 4.073373676248109e-06,
|
| 1400 |
+
"loss": 0.01934351325035095,
|
| 1401 |
+
"step": 4550
|
| 1402 |
+
},
|
| 1403 |
+
{
|
| 1404 |
+
"epoch": 0.8131887664415215,
|
| 1405 |
+
"grad_norm": 0.3795163333415985,
|
| 1406 |
+
"learning_rate": 3.978819969742814e-06,
|
| 1407 |
+
"loss": 0.020940425395965575,
|
| 1408 |
+
"step": 4575
|
| 1409 |
+
},
|
| 1410 |
+
{
|
| 1411 |
+
"epoch": 0.8176324209029506,
|
| 1412 |
+
"grad_norm": 1.1596218347549438,
|
| 1413 |
+
"learning_rate": 3.884266263237519e-06,
|
| 1414 |
+
"loss": 0.027658913135528564,
|
| 1415 |
+
"step": 4600
|
| 1416 |
+
},
|
| 1417 |
+
{
|
| 1418 |
+
"epoch": 0.8220760753643797,
|
| 1419 |
+
"grad_norm": 1.05118989944458,
|
| 1420 |
+
"learning_rate": 3.789712556732224e-06,
|
| 1421 |
+
"loss": 0.016726157665252685,
|
| 1422 |
+
"step": 4625
|
| 1423 |
+
},
|
| 1424 |
+
{
|
| 1425 |
+
"epoch": 0.8265197298258088,
|
| 1426 |
+
"grad_norm": 0.994926393032074,
|
| 1427 |
+
"learning_rate": 3.6951588502269293e-06,
|
| 1428 |
+
"loss": 0.013396300077438354,
|
| 1429 |
+
"step": 4650
|
| 1430 |
+
},
|
| 1431 |
+
{
|
| 1432 |
+
"epoch": 0.8309633842872378,
|
| 1433 |
+
"grad_norm": 2.416964054107666,
|
| 1434 |
+
"learning_rate": 3.6006051437216344e-06,
|
| 1435 |
+
"loss": 0.02318723201751709,
|
| 1436 |
+
"step": 4675
|
| 1437 |
+
},
|
| 1438 |
+
{
|
| 1439 |
+
"epoch": 0.835407038748667,
|
| 1440 |
+
"grad_norm": 2.359633445739746,
|
| 1441 |
+
"learning_rate": 3.506051437216339e-06,
|
| 1442 |
+
"loss": 0.02345597982406616,
|
| 1443 |
+
"step": 4700
|
| 1444 |
+
},
|
| 1445 |
+
{
|
| 1446 |
+
"epoch": 0.839850693210096,
|
| 1447 |
+
"grad_norm": 1.0586191415786743,
|
| 1448 |
+
"learning_rate": 3.4114977307110442e-06,
|
| 1449 |
+
"loss": 0.021095492839813233,
|
| 1450 |
+
"step": 4725
|
| 1451 |
+
},
|
| 1452 |
+
{
|
| 1453 |
+
"epoch": 0.8442943476715251,
|
| 1454 |
+
"grad_norm": 1.2005938291549683,
|
| 1455 |
+
"learning_rate": 3.3169440242057494e-06,
|
| 1456 |
+
"loss": 0.023975539207458495,
|
| 1457 |
+
"step": 4750
|
| 1458 |
+
},
|
| 1459 |
+
{
|
| 1460 |
+
"epoch": 0.8487380021329541,
|
| 1461 |
+
"grad_norm": 0.22911959886550903,
|
| 1462 |
+
"learning_rate": 3.222390317700454e-06,
|
| 1463 |
+
"loss": 0.019334245920181275,
|
| 1464 |
+
"step": 4775
|
| 1465 |
+
},
|
| 1466 |
+
{
|
| 1467 |
+
"epoch": 0.8531816565943833,
|
| 1468 |
+
"grad_norm": 1.3965319395065308,
|
| 1469 |
+
"learning_rate": 3.1278366111951592e-06,
|
| 1470 |
+
"loss": 0.027639262676239014,
|
| 1471 |
+
"step": 4800
|
| 1472 |
+
},
|
| 1473 |
+
{
|
| 1474 |
+
"epoch": 0.8576253110558123,
|
| 1475 |
+
"grad_norm": 0.15118920803070068,
|
| 1476 |
+
"learning_rate": 3.0332829046898644e-06,
|
| 1477 |
+
"loss": 0.020000927448272705,
|
| 1478 |
+
"step": 4825
|
| 1479 |
+
},
|
| 1480 |
+
{
|
| 1481 |
+
"epoch": 0.8620689655172413,
|
| 1482 |
+
"grad_norm": 1.7333295345306396,
|
| 1483 |
+
"learning_rate": 2.938729198184569e-06,
|
| 1484 |
+
"loss": 0.02153873920440674,
|
| 1485 |
+
"step": 4850
|
| 1486 |
+
},
|
| 1487 |
+
{
|
| 1488 |
+
"epoch": 0.8665126199786705,
|
| 1489 |
+
"grad_norm": 0.2823106348514557,
|
| 1490 |
+
"learning_rate": 2.844175491679274e-06,
|
| 1491 |
+
"loss": 0.014718363285064697,
|
| 1492 |
+
"step": 4875
|
| 1493 |
+
},
|
| 1494 |
+
{
|
| 1495 |
+
"epoch": 0.8709562744400995,
|
| 1496 |
+
"grad_norm": 0.735140323638916,
|
| 1497 |
+
"learning_rate": 2.7496217851739793e-06,
|
| 1498 |
+
"loss": 0.01869586229324341,
|
| 1499 |
+
"step": 4900
|
| 1500 |
+
},
|
| 1501 |
+
{
|
| 1502 |
+
"epoch": 0.8753999289015286,
|
| 1503 |
+
"grad_norm": 0.4756013751029968,
|
| 1504 |
+
"learning_rate": 2.655068078668684e-06,
|
| 1505 |
+
"loss": 0.01766459345817566,
|
| 1506 |
+
"step": 4925
|
| 1507 |
+
},
|
| 1508 |
+
{
|
| 1509 |
+
"epoch": 0.8798435833629576,
|
| 1510 |
+
"grad_norm": 1.9793126583099365,
|
| 1511 |
+
"learning_rate": 2.560514372163389e-06,
|
| 1512 |
+
"loss": 0.01844774007797241,
|
| 1513 |
+
"step": 4950
|
| 1514 |
+
},
|
| 1515 |
+
{
|
| 1516 |
+
"epoch": 0.8842872378243868,
|
| 1517 |
+
"grad_norm": 0.3258880376815796,
|
| 1518 |
+
"learning_rate": 2.465960665658094e-06,
|
| 1519 |
+
"loss": 0.01422677755355835,
|
| 1520 |
+
"step": 4975
|
| 1521 |
+
},
|
| 1522 |
+
{
|
| 1523 |
+
"epoch": 0.8887308922858158,
|
| 1524 |
+
"grad_norm": 0.9487712383270264,
|
| 1525 |
+
"learning_rate": 2.371406959152799e-06,
|
| 1526 |
+
"loss": 0.016807562112808226,
|
| 1527 |
+
"step": 5000
|
| 1528 |
+
},
|
| 1529 |
+
{
|
| 1530 |
+
"epoch": 0.8887308922858158,
|
| 1531 |
+
"eval_accuracy": 0.9295,
|
| 1532 |
+
"eval_auroc": 0.9969978919241093,
|
| 1533 |
+
"eval_f1": 0.9344490934449095,
|
| 1534 |
+
"eval_loss": 0.024602515622973442,
|
| 1535 |
+
"eval_runtime": 39.8771,
|
| 1536 |
+
"eval_samples_per_second": 50.154,
|
| 1537 |
+
"eval_steps_per_second": 1.58,
|
| 1538 |
+
"eval_tpr_at_fpr1": 0.889662027833002,
|
| 1539 |
+
"eval_tpr_at_fpr5": 0.9910536779324056,
|
| 1540 |
+
"step": 5000
|
| 1541 |
+
},
|
| 1542 |
+
{
|
| 1543 |
+
"epoch": 0.893174546747245,
|
| 1544 |
+
"grad_norm": 1.8331549167633057,
|
| 1545 |
+
"learning_rate": 2.276853252647504e-06,
|
| 1546 |
+
"loss": 0.019318313598632814,
|
| 1547 |
+
"step": 5025
|
| 1548 |
+
},
|
| 1549 |
+
{
|
| 1550 |
+
"epoch": 0.897618201208674,
|
| 1551 |
+
"grad_norm": 0.5335781574249268,
|
| 1552 |
+
"learning_rate": 2.182299546142209e-06,
|
| 1553 |
+
"loss": 0.016727542877197264,
|
| 1554 |
+
"step": 5050
|
| 1555 |
+
},
|
| 1556 |
+
{
|
| 1557 |
+
"epoch": 0.9020618556701031,
|
| 1558 |
+
"grad_norm": 2.0710813999176025,
|
| 1559 |
+
"learning_rate": 2.087745839636914e-06,
|
| 1560 |
+
"loss": 0.024016971588134765,
|
| 1561 |
+
"step": 5075
|
| 1562 |
+
},
|
| 1563 |
+
{
|
| 1564 |
+
"epoch": 0.9065055101315321,
|
| 1565 |
+
"grad_norm": 0.5005258321762085,
|
| 1566 |
+
"learning_rate": 1.993192133131619e-06,
|
| 1567 |
+
"loss": 0.023308300971984865,
|
| 1568 |
+
"step": 5100
|
| 1569 |
+
},
|
| 1570 |
+
{
|
| 1571 |
+
"epoch": 0.9109491645929613,
|
| 1572 |
+
"grad_norm": 0.8444198369979858,
|
| 1573 |
+
"learning_rate": 1.8986384266263239e-06,
|
| 1574 |
+
"loss": 0.013868091106414794,
|
| 1575 |
+
"step": 5125
|
| 1576 |
+
},
|
| 1577 |
+
{
|
| 1578 |
+
"epoch": 0.9153928190543903,
|
| 1579 |
+
"grad_norm": 1.0288333892822266,
|
| 1580 |
+
"learning_rate": 1.8040847201210288e-06,
|
| 1581 |
+
"loss": 0.02429831266403198,
|
| 1582 |
+
"step": 5150
|
| 1583 |
+
},
|
| 1584 |
+
{
|
| 1585 |
+
"epoch": 0.9198364735158194,
|
| 1586 |
+
"grad_norm": 0.5995722413063049,
|
| 1587 |
+
"learning_rate": 1.709531013615734e-06,
|
| 1588 |
+
"loss": 0.020702524185180662,
|
| 1589 |
+
"step": 5175
|
| 1590 |
+
},
|
| 1591 |
+
{
|
| 1592 |
+
"epoch": 0.9242801279772485,
|
| 1593 |
+
"grad_norm": 1.5560880899429321,
|
| 1594 |
+
"learning_rate": 1.6149773071104389e-06,
|
| 1595 |
+
"loss": 0.014829163551330566,
|
| 1596 |
+
"step": 5200
|
| 1597 |
+
},
|
| 1598 |
+
{
|
| 1599 |
+
"epoch": 0.9287237824386776,
|
| 1600 |
+
"grad_norm": 1.271360993385315,
|
| 1601 |
+
"learning_rate": 1.5204236006051438e-06,
|
| 1602 |
+
"loss": 0.020302300453186036,
|
| 1603 |
+
"step": 5225
|
| 1604 |
+
},
|
| 1605 |
+
{
|
| 1606 |
+
"epoch": 0.9331674369001066,
|
| 1607 |
+
"grad_norm": 2.036619186401367,
|
| 1608 |
+
"learning_rate": 1.425869894099849e-06,
|
| 1609 |
+
"loss": 0.023549365997314452,
|
| 1610 |
+
"step": 5250
|
| 1611 |
+
},
|
| 1612 |
+
{
|
| 1613 |
+
"epoch": 0.9376110913615358,
|
| 1614 |
+
"grad_norm": 1.7285027503967285,
|
| 1615 |
+
"learning_rate": 1.3313161875945538e-06,
|
| 1616 |
+
"loss": 0.02568220853805542,
|
| 1617 |
+
"step": 5275
|
| 1618 |
+
},
|
| 1619 |
+
{
|
| 1620 |
+
"epoch": 0.9420547458229648,
|
| 1621 |
+
"grad_norm": 1.4646673202514648,
|
| 1622 |
+
"learning_rate": 1.2367624810892588e-06,
|
| 1623 |
+
"loss": 0.030799252986907957,
|
| 1624 |
+
"step": 5300
|
| 1625 |
+
},
|
| 1626 |
+
{
|
| 1627 |
+
"epoch": 0.9464984002843939,
|
| 1628 |
+
"grad_norm": 0.6867812871932983,
|
| 1629 |
+
"learning_rate": 1.142208774583964e-06,
|
| 1630 |
+
"loss": 0.018717833757400514,
|
| 1631 |
+
"step": 5325
|
| 1632 |
+
},
|
| 1633 |
+
{
|
| 1634 |
+
"epoch": 0.9509420547458229,
|
| 1635 |
+
"grad_norm": 0.2627001702785492,
|
| 1636 |
+
"learning_rate": 1.0476550680786688e-06,
|
| 1637 |
+
"loss": 0.01653684616088867,
|
| 1638 |
+
"step": 5350
|
| 1639 |
+
},
|
| 1640 |
+
{
|
| 1641 |
+
"epoch": 0.9553857092072521,
|
| 1642 |
+
"grad_norm": 0.30518868565559387,
|
| 1643 |
+
"learning_rate": 9.531013615733737e-07,
|
| 1644 |
+
"loss": 0.022223813533782957,
|
| 1645 |
+
"step": 5375
|
| 1646 |
+
},
|
| 1647 |
+
{
|
| 1648 |
+
"epoch": 0.9598293636686811,
|
| 1649 |
+
"grad_norm": 0.5545350909233093,
|
| 1650 |
+
"learning_rate": 8.585476550680788e-07,
|
| 1651 |
+
"loss": 0.018215081691741943,
|
| 1652 |
+
"step": 5400
|
| 1653 |
+
},
|
| 1654 |
+
{
|
| 1655 |
+
"epoch": 0.9642730181301102,
|
| 1656 |
+
"grad_norm": 0.304283082485199,
|
| 1657 |
+
"learning_rate": 7.639939485627837e-07,
|
| 1658 |
+
"loss": 0.019889332056045532,
|
| 1659 |
+
"step": 5425
|
| 1660 |
+
},
|
| 1661 |
+
{
|
| 1662 |
+
"epoch": 0.9687166725915393,
|
| 1663 |
+
"grad_norm": 1.052090048789978,
|
| 1664 |
+
"learning_rate": 6.694402420574887e-07,
|
| 1665 |
+
"loss": 0.017396693229675294,
|
| 1666 |
+
"step": 5450
|
| 1667 |
+
},
|
| 1668 |
+
{
|
| 1669 |
+
"epoch": 0.9731603270529684,
|
| 1670 |
+
"grad_norm": 0.1891939640045166,
|
| 1671 |
+
"learning_rate": 5.748865355521937e-07,
|
| 1672 |
+
"loss": 0.019253385066986085,
|
| 1673 |
+
"step": 5475
|
| 1674 |
+
},
|
| 1675 |
+
{
|
| 1676 |
+
"epoch": 0.9776039815143974,
|
| 1677 |
+
"grad_norm": 0.5522451996803284,
|
| 1678 |
+
"learning_rate": 4.803328290468987e-07,
|
| 1679 |
+
"loss": 0.017252475023269653,
|
| 1680 |
+
"step": 5500
|
| 1681 |
+
},
|
| 1682 |
+
{
|
| 1683 |
+
"epoch": 0.9776039815143974,
|
| 1684 |
+
"eval_accuracy": 0.93,
|
| 1685 |
+
"eval_auroc": 0.9973359040925472,
|
| 1686 |
+
"eval_f1": 0.9349442379182157,
|
| 1687 |
+
"eval_loss": 0.024247920140624046,
|
| 1688 |
+
"eval_runtime": 38.3653,
|
| 1689 |
+
"eval_samples_per_second": 52.13,
|
| 1690 |
+
"eval_steps_per_second": 1.642,
|
| 1691 |
+
"eval_tpr_at_fpr1": 0.9055666003976143,
|
| 1692 |
+
"eval_tpr_at_fpr5": 0.9880715705765407,
|
| 1693 |
+
"step": 5500
|
| 1694 |
+
},
|
| 1695 |
+
{
|
| 1696 |
+
"epoch": 0.9820476359758266,
|
| 1697 |
+
"grad_norm": 1.228925347328186,
|
| 1698 |
+
"learning_rate": 3.8577912254160366e-07,
|
| 1699 |
+
"loss": 0.022175378799438476,
|
| 1700 |
+
"step": 5525
|
| 1701 |
+
},
|
| 1702 |
+
{
|
| 1703 |
+
"epoch": 0.9864912904372556,
|
| 1704 |
+
"grad_norm": 0.3541058599948883,
|
| 1705 |
+
"learning_rate": 2.9122541603630864e-07,
|
| 1706 |
+
"loss": 0.019693093299865724,
|
| 1707 |
+
"step": 5550
|
| 1708 |
+
},
|
| 1709 |
+
{
|
| 1710 |
+
"epoch": 0.9909349448986847,
|
| 1711 |
+
"grad_norm": 0.0981239303946495,
|
| 1712 |
+
"learning_rate": 1.9667170953101364e-07,
|
| 1713 |
+
"loss": 0.018846184015274048,
|
| 1714 |
+
"step": 5575
|
| 1715 |
+
},
|
| 1716 |
+
{
|
| 1717 |
+
"epoch": 0.9953785993601137,
|
| 1718 |
+
"grad_norm": 0.9296360611915588,
|
| 1719 |
+
"learning_rate": 1.021180030257186e-07,
|
| 1720 |
+
"loss": 0.015072580575942993,
|
| 1721 |
+
"step": 5600
|
| 1722 |
+
},
|
| 1723 |
+
{
|
| 1724 |
+
"epoch": 0.9998222538215429,
|
| 1725 |
+
"grad_norm": 1.3698371648788452,
|
| 1726 |
+
"learning_rate": 7.564296520423602e-09,
|
| 1727 |
+
"loss": 0.022552621364593507,
|
| 1728 |
+
"step": 5625
|
| 1729 |
+
},
|
| 1730 |
+
{
|
| 1731 |
+
"epoch": 1.0,
|
| 1732 |
+
"eval_accuracy": 0.9275,
|
| 1733 |
+
"eval_auroc": 0.9973979063246277,
|
| 1734 |
+
"eval_f1": 0.9327770050996754,
|
| 1735 |
+
"eval_loss": 0.02445497363805771,
|
| 1736 |
+
"eval_runtime": 39.1622,
|
| 1737 |
+
"eval_samples_per_second": 51.07,
|
| 1738 |
+
"eval_steps_per_second": 1.609,
|
| 1739 |
+
"eval_tpr_at_fpr1": 0.911530815109344,
|
| 1740 |
+
"eval_tpr_at_fpr5": 0.989065606361829,
|
| 1741 |
+
"step": 5626
|
| 1742 |
+
}
|
| 1743 |
+
],
|
| 1744 |
+
"logging_steps": 25,
|
| 1745 |
+
"max_steps": 5626,
|
| 1746 |
+
"num_input_tokens_seen": 0,
|
| 1747 |
+
"num_train_epochs": 1,
|
| 1748 |
+
"save_steps": 500,
|
| 1749 |
+
"stateful_callbacks": {
|
| 1750 |
+
"EarlyStoppingCallback": {
|
| 1751 |
+
"args": {
|
| 1752 |
+
"early_stopping_patience": 3,
|
| 1753 |
+
"early_stopping_threshold": 0.0
|
| 1754 |
+
},
|
| 1755 |
+
"attributes": {
|
| 1756 |
+
"early_stopping_patience_counter": 0
|
| 1757 |
+
}
|
| 1758 |
+
},
|
| 1759 |
+
"TrainerControl": {
|
| 1760 |
+
"args": {
|
| 1761 |
+
"should_epoch_stop": false,
|
| 1762 |
+
"should_evaluate": false,
|
| 1763 |
+
"should_log": false,
|
| 1764 |
+
"should_save": true,
|
| 1765 |
+
"should_training_stop": true
|
| 1766 |
+
},
|
| 1767 |
+
"attributes": {}
|
| 1768 |
+
}
|
| 1769 |
+
},
|
| 1770 |
+
"total_flos": 0.0,
|
| 1771 |
+
"train_batch_size": 32,
|
| 1772 |
+
"trial_name": null,
|
| 1773 |
+
"trial_params": null
|
| 1774 |
+
}
|
checkpoint-5626/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b726f050f5029e1ef25800ffb43c1e5bcf5df8fde670427401e6bad8b3522c9c
|
| 3 |
+
size 5329
|
meta.json
CHANGED
|
@@ -36,6 +36,6 @@
|
|
| 36 |
"wiki": 13
|
| 37 |
},
|
| 38 |
"num_domains": 14,
|
| 39 |
-
"n_train":
|
| 40 |
-
"n_val":
|
| 41 |
}
|
|
|
|
| 36 |
"wiki": 13
|
| 37 |
},
|
| 38 |
"num_domains": 14,
|
| 39 |
+
"n_train": 180001,
|
| 40 |
+
"n_val": 19999
|
| 41 |
}
|
onnx/detector_config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"base_encoder": "microsoft/deberta-v3-base",
|
| 3 |
+
"feature_dim": 17,
|
| 4 |
+
"feat_hidden": 64,
|
| 5 |
+
"num_domains": 14,
|
| 6 |
+
"domain_loss_weight": 0.2,
|
| 7 |
+
"dropout": 0.1,
|
| 8 |
+
"focal_gamma": 2.0,
|
| 9 |
+
"focal_alpha": 0.85
|
| 10 |
+
}
|
onnx/meta.json
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"feature_names": [
|
| 3 |
+
"flesch_reading_ease",
|
| 4 |
+
"flesch_kincaid_grade",
|
| 5 |
+
"gunning_fog",
|
| 6 |
+
"type_token_ratio",
|
| 7 |
+
"hapax_rate",
|
| 8 |
+
"avg_word_len",
|
| 9 |
+
"unique_word_frac",
|
| 10 |
+
"sentence_count_log",
|
| 11 |
+
"mean_sentence_len",
|
| 12 |
+
"sentence_len_std",
|
| 13 |
+
"sentence_len_cv",
|
| 14 |
+
"commas_per_sentence",
|
| 15 |
+
"punct_ratio",
|
| 16 |
+
"stopword_ratio",
|
| 17 |
+
"digit_ratio",
|
| 18 |
+
"upper_ratio",
|
| 19 |
+
"word_count_log"
|
| 20 |
+
],
|
| 21 |
+
"feature_dim": 17,
|
| 22 |
+
"domain2id": {
|
| 23 |
+
"finance": 0,
|
| 24 |
+
"medicine": 1,
|
| 25 |
+
"mixed": 2,
|
| 26 |
+
"open_qa": 3,
|
| 27 |
+
"raid_abstracts": 4,
|
| 28 |
+
"raid_books": 5,
|
| 29 |
+
"raid_news": 6,
|
| 30 |
+
"raid_poetry": 7,
|
| 31 |
+
"raid_recipes": 8,
|
| 32 |
+
"raid_reddit": 9,
|
| 33 |
+
"raid_reviews": 10,
|
| 34 |
+
"raid_wiki": 11,
|
| 35 |
+
"reddit": 12,
|
| 36 |
+
"wiki": 13
|
| 37 |
+
},
|
| 38 |
+
"num_domains": 14,
|
| 39 |
+
"n_train": 180001,
|
| 40 |
+
"n_val": 19999
|
| 41 |
+
}
|
onnx/model_fp16.onnx
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 369348178
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:af06fe93c34ea853ea573d1daa93683dfcb4b7ef80299f85a6b15ea23c64fcb0
|
| 3 |
size 369348178
|
onnx/model_fp32.onnx
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 737710212
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5562b7713735b9141b8005c39460e866a5462c9bf409993dc54d70bab6143c45
|
| 3 |
size 737710212
|
onnx/model_int8.onnx
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 243819134
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6dc3275bc3c4e0d89bc3cd1e9aacca98ee054e39e1e5b6099c7058ec06e3b1d6
|
| 3 |
size 243819134
|
onnx/model_q4.onnx
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 177709957
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c03511115935e5d337be758bfb127e316cf3e214c31f40eb3decd6dff424c58f
|
| 3 |
size 177709957
|
onnx/raid_results.json
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"overall": {
|
| 3 |
+
"n": 20000,
|
| 4 |
+
"accuracy": 0.98195,
|
| 5 |
+
"tpr_at_fpr5": 0.9146611698015975,
|
| 6 |
+
"tpr_at_fpr1": 0.7988662715794899,
|
| 7 |
+
"auroc": 0.9821251128640067
|
| 8 |
+
},
|
| 9 |
+
"by_attack": {
|
| 10 |
+
"alternative_spelling": {
|
| 11 |
+
"n": 2225,
|
| 12 |
+
"accuracy": 0.8728089887640449,
|
| 13 |
+
"tpr_at_fpr5": 0.9177914110429448,
|
| 14 |
+
"tpr_at_fpr1": 0.8165644171779141,
|
| 15 |
+
"auroc": 0.9824694540392843
|
| 16 |
+
},
|
| 17 |
+
"article_deletion": {
|
| 18 |
+
"n": 2231,
|
| 19 |
+
"accuracy": 0.8731510533393098,
|
| 20 |
+
"tpr_at_fpr5": 0.8905867970660146,
|
| 21 |
+
"tpr_at_fpr1": 0.7854523227383863,
|
| 22 |
+
"auroc": 0.9769421215919131
|
| 23 |
+
},
|
| 24 |
+
"homoglyph": {
|
| 25 |
+
"n": 2170,
|
| 26 |
+
"accuracy": 0.871889400921659,
|
| 27 |
+
"tpr_at_fpr5": 0.9326984126984127,
|
| 28 |
+
"tpr_at_fpr1": 0.8488888888888889,
|
| 29 |
+
"auroc": 0.9868662131519276
|
| 30 |
+
},
|
| 31 |
+
"insert_paragraphs": {
|
| 32 |
+
"n": 2215,
|
| 33 |
+
"accuracy": 0.8753950338600451,
|
| 34 |
+
"tpr_at_fpr5": 0.9302469135802469,
|
| 35 |
+
"tpr_at_fpr1": 0.8351851851851851,
|
| 36 |
+
"auroc": 0.9850887021475256
|
| 37 |
+
},
|
| 38 |
+
"none": {
|
| 39 |
+
"n": 2149,
|
| 40 |
+
"accuracy": 0.8711028385295486,
|
| 41 |
+
"tpr_at_fpr5": 0.9298584298584298,
|
| 42 |
+
"tpr_at_fpr1": 0.8532818532818532,
|
| 43 |
+
"auroc": 0.9860971415593265
|
| 44 |
+
},
|
| 45 |
+
"number": {
|
| 46 |
+
"n": 2211,
|
| 47 |
+
"accuracy": 0.8756218905472637,
|
| 48 |
+
"tpr_at_fpr5": 0.9282178217821783,
|
| 49 |
+
"tpr_at_fpr1": 0.8168316831683168,
|
| 50 |
+
"auroc": 0.9850663532739828
|
| 51 |
+
},
|
| 52 |
+
"paraphrase": {
|
| 53 |
+
"n": 2259,
|
| 54 |
+
"accuracy": 0.8764940239043825,
|
| 55 |
+
"tpr_at_fpr5": 0.8641826923076923,
|
| 56 |
+
"tpr_at_fpr1": 0.6219951923076923,
|
| 57 |
+
"auroc": 0.9738819085326438
|
| 58 |
+
},
|
| 59 |
+
"perplexity_misspelling": {
|
| 60 |
+
"n": 2259,
|
| 61 |
+
"accuracy": 0.8747233289065959,
|
| 62 |
+
"tpr_at_fpr5": 0.8990384615384616,
|
| 63 |
+
"tpr_at_fpr1": 0.7884615384615384,
|
| 64 |
+
"auroc": 0.9792622818358111
|
| 65 |
+
},
|
| 66 |
+
"synonym": {
|
| 67 |
+
"n": 2211,
|
| 68 |
+
"accuracy": 0.8715513342379014,
|
| 69 |
+
"tpr_at_fpr5": 0.9102722772277227,
|
| 70 |
+
"tpr_at_fpr1": 0.7623762376237624,
|
| 71 |
+
"auroc": 0.9792006406523005
|
| 72 |
+
},
|
| 73 |
+
"upper_lower": {
|
| 74 |
+
"n": 2197,
|
| 75 |
+
"accuracy": 0.8725534820209376,
|
| 76 |
+
"tpr_at_fpr5": 0.9257178526841449,
|
| 77 |
+
"tpr_at_fpr1": 0.8033707865168539,
|
| 78 |
+
"auroc": 0.9831041030644467
|
| 79 |
+
},
|
| 80 |
+
"whitespace": {
|
| 81 |
+
"n": 2208,
|
| 82 |
+
"accuracy": 0.8745471014492754,
|
| 83 |
+
"tpr_at_fpr5": 0.921264724116553,
|
| 84 |
+
"tpr_at_fpr1": 0.8239305641661501,
|
| 85 |
+
"auroc": 0.9838674217362083
|
| 86 |
+
},
|
| 87 |
+
"zero_width_space": {
|
| 88 |
+
"n": 2210,
|
| 89 |
+
"accuracy": 0.8742081447963801,
|
| 90 |
+
"tpr_at_fpr5": 0.9294117647058824,
|
| 91 |
+
"tpr_at_fpr1": 0.8390092879256966,
|
| 92 |
+
"auroc": 0.9843244790176133
|
| 93 |
+
}
|
| 94 |
+
},
|
| 95 |
+
"by_domain": {
|
| 96 |
+
"abstracts": {
|
| 97 |
+
"n": 3178,
|
| 98 |
+
"accuracy": 0.9128382630585273,
|
| 99 |
+
"tpr_at_fpr5": 0.9043747580332946,
|
| 100 |
+
"tpr_at_fpr1": 0.7557104142469996,
|
| 101 |
+
"auroc": 0.9824040185179763
|
| 102 |
+
},
|
| 103 |
+
"books": {
|
| 104 |
+
"n": 3235,
|
| 105 |
+
"accuracy": 0.9153013910355486,
|
| 106 |
+
"tpr_at_fpr5": 0.9704545454545455,
|
| 107 |
+
"tpr_at_fpr1": 0.9053030303030303,
|
| 108 |
+
"auroc": 0.9939209320091673
|
| 109 |
+
},
|
| 110 |
+
"news": {
|
| 111 |
+
"n": 3226,
|
| 112 |
+
"accuracy": 0.912275263484191,
|
| 113 |
+
"tpr_at_fpr5": 0.8749524895477004,
|
| 114 |
+
"tpr_at_fpr1": 0.7229190421892816,
|
| 115 |
+
"auroc": 0.9752428223284753
|
| 116 |
+
},
|
| 117 |
+
"poetry": {
|
| 118 |
+
"n": 3136,
|
| 119 |
+
"accuracy": 0.9075255102040817,
|
| 120 |
+
"tpr_at_fpr5": 0.885478158205431,
|
| 121 |
+
"tpr_at_fpr1": 0.7276662731208186,
|
| 122 |
+
"auroc": 0.9738301932343185
|
| 123 |
+
},
|
| 124 |
+
"recipes": {
|
| 125 |
+
"n": 3148,
|
| 126 |
+
"accuracy": 0.9113722998729352,
|
| 127 |
+
"tpr_at_fpr5": 0.9498629063846455,
|
| 128 |
+
"tpr_at_fpr1": 0.8527222875048962,
|
| 129 |
+
"auroc": 0.989551919475193
|
| 130 |
+
},
|
| 131 |
+
"reddit": {
|
| 132 |
+
"n": 3179,
|
| 133 |
+
"accuracy": 0.9053161371500472,
|
| 134 |
+
"tpr_at_fpr5": 0.9036377708978328,
|
| 135 |
+
"tpr_at_fpr1": 0.81656346749226,
|
| 136 |
+
"auroc": 0.9770247417852589
|
| 137 |
+
},
|
| 138 |
+
"reviews": {
|
| 139 |
+
"n": 1933,
|
| 140 |
+
"accuracy": 0.8572167615106053,
|
| 141 |
+
"tpr_at_fpr5": 0.9200298953662183,
|
| 142 |
+
"tpr_at_fpr1": 0.8243647234678625,
|
| 143 |
+
"auroc": 0.9841969074625365
|
| 144 |
+
},
|
| 145 |
+
"wiki": {
|
| 146 |
+
"n": 3130,
|
| 147 |
+
"accuracy": 0.9089456869009584,
|
| 148 |
+
"tpr_at_fpr5": 0.9104536489151874,
|
| 149 |
+
"tpr_at_fpr1": 0.7964497041420119,
|
| 150 |
+
"auroc": 0.9816398985629755
|
| 151 |
+
}
|
| 152 |
+
}
|
| 153 |
+
}
|
onnx/raid_submission.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6565545394e9e633e77dc60ff996c310401159ebef576b4805aec322823a6c0c
|
| 3 |
+
size 736857075
|
raid_results.json
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"overall": {
|
| 3 |
+
"n": 20000,
|
| 4 |
+
"accuracy": 0.98195,
|
| 5 |
+
"tpr_at_fpr5": 0.9146611698015975,
|
| 6 |
+
"tpr_at_fpr1": 0.7988662715794899,
|
| 7 |
+
"auroc": 0.9821251128640067
|
| 8 |
+
},
|
| 9 |
+
"by_attack": {
|
| 10 |
+
"alternative_spelling": {
|
| 11 |
+
"n": 2225,
|
| 12 |
+
"accuracy": 0.8728089887640449,
|
| 13 |
+
"tpr_at_fpr5": 0.9177914110429448,
|
| 14 |
+
"tpr_at_fpr1": 0.8165644171779141,
|
| 15 |
+
"auroc": 0.9824694540392843
|
| 16 |
+
},
|
| 17 |
+
"article_deletion": {
|
| 18 |
+
"n": 2231,
|
| 19 |
+
"accuracy": 0.8731510533393098,
|
| 20 |
+
"tpr_at_fpr5": 0.8905867970660146,
|
| 21 |
+
"tpr_at_fpr1": 0.7854523227383863,
|
| 22 |
+
"auroc": 0.9769421215919131
|
| 23 |
+
},
|
| 24 |
+
"homoglyph": {
|
| 25 |
+
"n": 2170,
|
| 26 |
+
"accuracy": 0.871889400921659,
|
| 27 |
+
"tpr_at_fpr5": 0.9326984126984127,
|
| 28 |
+
"tpr_at_fpr1": 0.8488888888888889,
|
| 29 |
+
"auroc": 0.9868662131519276
|
| 30 |
+
},
|
| 31 |
+
"insert_paragraphs": {
|
| 32 |
+
"n": 2215,
|
| 33 |
+
"accuracy": 0.8753950338600451,
|
| 34 |
+
"tpr_at_fpr5": 0.9302469135802469,
|
| 35 |
+
"tpr_at_fpr1": 0.8351851851851851,
|
| 36 |
+
"auroc": 0.9850887021475256
|
| 37 |
+
},
|
| 38 |
+
"none": {
|
| 39 |
+
"n": 2149,
|
| 40 |
+
"accuracy": 0.8711028385295486,
|
| 41 |
+
"tpr_at_fpr5": 0.9298584298584298,
|
| 42 |
+
"tpr_at_fpr1": 0.8532818532818532,
|
| 43 |
+
"auroc": 0.9860971415593265
|
| 44 |
+
},
|
| 45 |
+
"number": {
|
| 46 |
+
"n": 2211,
|
| 47 |
+
"accuracy": 0.8756218905472637,
|
| 48 |
+
"tpr_at_fpr5": 0.9282178217821783,
|
| 49 |
+
"tpr_at_fpr1": 0.8168316831683168,
|
| 50 |
+
"auroc": 0.9850663532739828
|
| 51 |
+
},
|
| 52 |
+
"paraphrase": {
|
| 53 |
+
"n": 2259,
|
| 54 |
+
"accuracy": 0.8764940239043825,
|
| 55 |
+
"tpr_at_fpr5": 0.8641826923076923,
|
| 56 |
+
"tpr_at_fpr1": 0.6219951923076923,
|
| 57 |
+
"auroc": 0.9738819085326438
|
| 58 |
+
},
|
| 59 |
+
"perplexity_misspelling": {
|
| 60 |
+
"n": 2259,
|
| 61 |
+
"accuracy": 0.8747233289065959,
|
| 62 |
+
"tpr_at_fpr5": 0.8990384615384616,
|
| 63 |
+
"tpr_at_fpr1": 0.7884615384615384,
|
| 64 |
+
"auroc": 0.9792622818358111
|
| 65 |
+
},
|
| 66 |
+
"synonym": {
|
| 67 |
+
"n": 2211,
|
| 68 |
+
"accuracy": 0.8715513342379014,
|
| 69 |
+
"tpr_at_fpr5": 0.9102722772277227,
|
| 70 |
+
"tpr_at_fpr1": 0.7623762376237624,
|
| 71 |
+
"auroc": 0.9792006406523005
|
| 72 |
+
},
|
| 73 |
+
"upper_lower": {
|
| 74 |
+
"n": 2197,
|
| 75 |
+
"accuracy": 0.8725534820209376,
|
| 76 |
+
"tpr_at_fpr5": 0.9257178526841449,
|
| 77 |
+
"tpr_at_fpr1": 0.8033707865168539,
|
| 78 |
+
"auroc": 0.9831041030644467
|
| 79 |
+
},
|
| 80 |
+
"whitespace": {
|
| 81 |
+
"n": 2208,
|
| 82 |
+
"accuracy": 0.8745471014492754,
|
| 83 |
+
"tpr_at_fpr5": 0.921264724116553,
|
| 84 |
+
"tpr_at_fpr1": 0.8239305641661501,
|
| 85 |
+
"auroc": 0.9838674217362083
|
| 86 |
+
},
|
| 87 |
+
"zero_width_space": {
|
| 88 |
+
"n": 2210,
|
| 89 |
+
"accuracy": 0.8742081447963801,
|
| 90 |
+
"tpr_at_fpr5": 0.9294117647058824,
|
| 91 |
+
"tpr_at_fpr1": 0.8390092879256966,
|
| 92 |
+
"auroc": 0.9843244790176133
|
| 93 |
+
}
|
| 94 |
+
},
|
| 95 |
+
"by_domain": {
|
| 96 |
+
"abstracts": {
|
| 97 |
+
"n": 3178,
|
| 98 |
+
"accuracy": 0.9128382630585273,
|
| 99 |
+
"tpr_at_fpr5": 0.9043747580332946,
|
| 100 |
+
"tpr_at_fpr1": 0.7557104142469996,
|
| 101 |
+
"auroc": 0.9824040185179763
|
| 102 |
+
},
|
| 103 |
+
"books": {
|
| 104 |
+
"n": 3235,
|
| 105 |
+
"accuracy": 0.9153013910355486,
|
| 106 |
+
"tpr_at_fpr5": 0.9704545454545455,
|
| 107 |
+
"tpr_at_fpr1": 0.9053030303030303,
|
| 108 |
+
"auroc": 0.9939209320091673
|
| 109 |
+
},
|
| 110 |
+
"news": {
|
| 111 |
+
"n": 3226,
|
| 112 |
+
"accuracy": 0.912275263484191,
|
| 113 |
+
"tpr_at_fpr5": 0.8749524895477004,
|
| 114 |
+
"tpr_at_fpr1": 0.7229190421892816,
|
| 115 |
+
"auroc": 0.9752428223284753
|
| 116 |
+
},
|
| 117 |
+
"poetry": {
|
| 118 |
+
"n": 3136,
|
| 119 |
+
"accuracy": 0.9075255102040817,
|
| 120 |
+
"tpr_at_fpr5": 0.885478158205431,
|
| 121 |
+
"tpr_at_fpr1": 0.7276662731208186,
|
| 122 |
+
"auroc": 0.9738301932343185
|
| 123 |
+
},
|
| 124 |
+
"recipes": {
|
| 125 |
+
"n": 3148,
|
| 126 |
+
"accuracy": 0.9113722998729352,
|
| 127 |
+
"tpr_at_fpr5": 0.9498629063846455,
|
| 128 |
+
"tpr_at_fpr1": 0.8527222875048962,
|
| 129 |
+
"auroc": 0.989551919475193
|
| 130 |
+
},
|
| 131 |
+
"reddit": {
|
| 132 |
+
"n": 3179,
|
| 133 |
+
"accuracy": 0.9053161371500472,
|
| 134 |
+
"tpr_at_fpr5": 0.9036377708978328,
|
| 135 |
+
"tpr_at_fpr1": 0.81656346749226,
|
| 136 |
+
"auroc": 0.9770247417852589
|
| 137 |
+
},
|
| 138 |
+
"reviews": {
|
| 139 |
+
"n": 1933,
|
| 140 |
+
"accuracy": 0.8572167615106053,
|
| 141 |
+
"tpr_at_fpr5": 0.9200298953662183,
|
| 142 |
+
"tpr_at_fpr1": 0.8243647234678625,
|
| 143 |
+
"auroc": 0.9841969074625365
|
| 144 |
+
},
|
| 145 |
+
"wiki": {
|
| 146 |
+
"n": 3130,
|
| 147 |
+
"accuracy": 0.9089456869009584,
|
| 148 |
+
"tpr_at_fpr5": 0.9104536489151874,
|
| 149 |
+
"tpr_at_fpr1": 0.7964497041420119,
|
| 150 |
+
"auroc": 0.9816398985629755
|
| 151 |
+
}
|
| 152 |
+
}
|
| 153 |
+
}
|