Training in progress, step 23600, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1491,6 +1491,14 @@ You can finetune this model on your own dataset.
|
|
| 1491 |
| 0.4082 | 23100 | 0.3474 |
|
| 1492 |
| 0.4091 | 23150 | 0.3208 |
|
| 1493 |
| 0.4100 | 23200 | 0.3798 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1494 |
|
| 1495 |
</details>
|
| 1496 |
|
|
|
|
| 1491 |
| 0.4082 | 23100 | 0.3474 |
|
| 1492 |
| 0.4091 | 23150 | 0.3208 |
|
| 1493 |
| 0.4100 | 23200 | 0.3798 |
|
| 1494 |
+
| 0.4108 | 23250 | 0.3282 |
|
| 1495 |
+
| 0.4117 | 23300 | 0.3302 |
|
| 1496 |
+
| 0.4126 | 23350 | 0.3599 |
|
| 1497 |
+
| 0.4135 | 23400 | 0.3608 |
|
| 1498 |
+
| 0.4144 | 23450 | 0.3387 |
|
| 1499 |
+
| 0.4153 | 23500 | 0.3987 |
|
| 1500 |
+
| 0.4161 | 23550 | 0.3387 |
|
| 1501 |
+
| 0.4170 | 23600 | 0.2989 |
|
| 1502 |
|
| 1503 |
</details>
|
| 1504 |
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1617eb2ae4888507c4f4075423705e736487e0fd06011313c271b8a67d2121e7
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8082e636e89c0305931d4fed9e511d53d0c861249cb9eb1baa51ec94b573d123
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c7c3077f3b0e21db426cf04aaf6706b3f8e724b43a1c804482891604f1539c3f
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2a1548d715b309492a66002f720121ae6b58979a558a4ea26d5d559620bd59b
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c549b0e10abd21bebaa5ec4fd4b6a6e95036a423d8901ec4f127ce499a3bb98
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -3256,6 +3256,62 @@
|
|
| 3256 |
"learning_rate": 3.278847852977558e-05,
|
| 3257 |
"loss": 0.3798,
|
| 3258 |
"step": 23200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3259 |
}
|
| 3260 |
],
|
| 3261 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.4170274425261967,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 23600,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 3256 |
"learning_rate": 3.278847852977558e-05,
|
| 3257 |
"loss": 0.3798,
|
| 3258 |
"step": 23200
|
| 3259 |
+
},
|
| 3260 |
+
{
|
| 3261 |
+
"epoch": 0.41084271350568113,
|
| 3262 |
+
"grad_norm": 1.8930203914642334,
|
| 3263 |
+
"learning_rate": 3.273939251143704e-05,
|
| 3264 |
+
"loss": 0.3282,
|
| 3265 |
+
"step": 23250
|
| 3266 |
+
},
|
| 3267 |
+
{
|
| 3268 |
+
"epoch": 0.4117262462228976,
|
| 3269 |
+
"grad_norm": 1.256135106086731,
|
| 3270 |
+
"learning_rate": 3.2690306493098507e-05,
|
| 3271 |
+
"loss": 0.3302,
|
| 3272 |
+
"step": 23300
|
| 3273 |
+
},
|
| 3274 |
+
{
|
| 3275 |
+
"epoch": 0.41260977894011414,
|
| 3276 |
+
"grad_norm": 1.952988862991333,
|
| 3277 |
+
"learning_rate": 3.264122047475997e-05,
|
| 3278 |
+
"loss": 0.3599,
|
| 3279 |
+
"step": 23350
|
| 3280 |
+
},
|
| 3281 |
+
{
|
| 3282 |
+
"epoch": 0.41349331165733066,
|
| 3283 |
+
"grad_norm": 1.3686082363128662,
|
| 3284 |
+
"learning_rate": 3.2592134456421436e-05,
|
| 3285 |
+
"loss": 0.3608,
|
| 3286 |
+
"step": 23400
|
| 3287 |
+
},
|
| 3288 |
+
{
|
| 3289 |
+
"epoch": 0.4143768443745472,
|
| 3290 |
+
"grad_norm": 1.56107759475708,
|
| 3291 |
+
"learning_rate": 3.2543048438082894e-05,
|
| 3292 |
+
"loss": 0.3387,
|
| 3293 |
+
"step": 23450
|
| 3294 |
+
},
|
| 3295 |
+
{
|
| 3296 |
+
"epoch": 0.4152603770917637,
|
| 3297 |
+
"grad_norm": 1.823240876197815,
|
| 3298 |
+
"learning_rate": 3.249396241974436e-05,
|
| 3299 |
+
"loss": 0.3987,
|
| 3300 |
+
"step": 23500
|
| 3301 |
+
},
|
| 3302 |
+
{
|
| 3303 |
+
"epoch": 0.41614390980898025,
|
| 3304 |
+
"grad_norm": 1.2912514209747314,
|
| 3305 |
+
"learning_rate": 3.244487640140583e-05,
|
| 3306 |
+
"loss": 0.3387,
|
| 3307 |
+
"step": 23550
|
| 3308 |
+
},
|
| 3309 |
+
{
|
| 3310 |
+
"epoch": 0.4170274425261967,
|
| 3311 |
+
"grad_norm": 1.5520604848861694,
|
| 3312 |
+
"learning_rate": 3.239579038306729e-05,
|
| 3313 |
+
"loss": 0.2989,
|
| 3314 |
+
"step": 23600
|
| 3315 |
}
|
| 3316 |
],
|
| 3317 |
"logging_steps": 50,
|