Training in progress, epoch 5, checkpoint
Browse files
checkpoint-17925/README.md
CHANGED
|
@@ -366,22 +366,7 @@ You can finetune this model on your own dataset.
|
|
| 366 |
### Training Logs
|
| 367 |
| Epoch | Step | Training Loss | Validation Loss | cosine_accuracy |
|
| 368 |
|:------:|:-----:|:-------------:|:---------------:|:---------------:|
|
| 369 |
-
|
|
| 370 |
-
| 0.2789 | 1000 | 1.6731 | 1.2452 | 0.9435 |
|
| 371 |
-
| 0.5579 | 2000 | 1.4545 | 1.1861 | 0.9491 |
|
| 372 |
-
| 0.8368 | 3000 | 1.3514 | 1.1498 | 0.9530 |
|
| 373 |
-
| 1.1158 | 4000 | 1.2499 | 1.1096 | 0.9535 |
|
| 374 |
-
| 1.3947 | 5000 | 1.1977 | 1.0828 | 0.9589 |
|
| 375 |
-
| 1.6736 | 6000 | 1.1468 | 1.0728 | 0.9585 |
|
| 376 |
-
| 1.9526 | 7000 | 1.1011 | 1.0473 | 0.9593 |
|
| 377 |
-
| 2.2315 | 8000 | 1.0401 | 1.0429 | 0.9575 |
|
| 378 |
-
| 2.5105 | 9000 | 1.0196 | 1.0377 | 0.9599 |
|
| 379 |
-
| 2.7894 | 10000 | 0.9939 | 1.0395 | 0.9592 |
|
| 380 |
-
| 3.0683 | 11000 | 0.9579 | 1.0249 | 0.9612 |
|
| 381 |
-
| 3.3473 | 12000 | 0.9437 | 1.0262 | 0.9600 |
|
| 382 |
-
| 3.6262 | 13000 | 0.9279 | 1.0129 | 0.9618 |
|
| 383 |
-
| 3.9052 | 14000 | 0.9184 | 1.0108 | 0.9621 |
|
| 384 |
-
| 4.1841 | 15000 | 0.8807 | 1.0071 | 0.9614 |
|
| 385 |
| 4.4630 | 16000 | 0.8802 | 1.0004 | 0.9613 |
|
| 386 |
| 4.7420 | 17000 | 0.8752 | 1.0061 | 0.9617 |
|
| 387 |
|
|
|
|
| 366 |
### Training Logs
|
| 367 |
| Epoch | Step | Training Loss | Validation Loss | cosine_accuracy |
|
| 368 |
|:------:|:-----:|:-------------:|:---------------:|:---------------:|
|
| 369 |
+
| 4.1841 | 15000 | 0.8884 | 1.0071 | 0.9614 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
| 4.4630 | 16000 | 0.8802 | 1.0004 | 0.9613 |
|
| 371 |
| 4.7420 | 17000 | 0.8752 | 1.0061 | 0.9617 |
|
| 372 |
|
checkpoint-17925/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e41377ba25d8813928ae800791180b298f08b3f137afff07123ecd1fe1b0657c
|
| 3 |
size 14244
|
checkpoint-17925/trainer_state.json
CHANGED
|
@@ -244,16 +244,16 @@
|
|
| 244 |
"epoch": 4.184100418410042,
|
| 245 |
"grad_norm": 4.519611358642578,
|
| 246 |
"learning_rate": 5.964002440512509e-06,
|
| 247 |
-
"loss": 0.
|
| 248 |
"step": 15000
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"epoch": 4.184100418410042,
|
| 252 |
"eval_cosine_accuracy": 0.9614049792289734,
|
| 253 |
"eval_loss": 1.0071080923080444,
|
| 254 |
-
"eval_runtime":
|
| 255 |
-
"eval_samples_per_second":
|
| 256 |
-
"eval_steps_per_second": 3.
|
| 257 |
"step": 15000
|
| 258 |
},
|
| 259 |
{
|
|
@@ -267,9 +267,9 @@
|
|
| 267 |
"epoch": 4.463040446304045,
|
| 268 |
"eval_cosine_accuracy": 0.9612998366355896,
|
| 269 |
"eval_loss": 1.0004464387893677,
|
| 270 |
-
"eval_runtime":
|
| 271 |
-
"eval_samples_per_second":
|
| 272 |
-
"eval_steps_per_second": 3.
|
| 273 |
"step": 16000
|
| 274 |
},
|
| 275 |
{
|
|
@@ -283,9 +283,9 @@
|
|
| 283 |
"epoch": 4.741980474198048,
|
| 284 |
"eval_cosine_accuracy": 0.9617204666137695,
|
| 285 |
"eval_loss": 1.0060843229293823,
|
| 286 |
-
"eval_runtime":
|
| 287 |
-
"eval_samples_per_second":
|
| 288 |
-
"eval_steps_per_second": 3.
|
| 289 |
"step": 17000
|
| 290 |
}
|
| 291 |
],
|
|
|
|
| 244 |
"epoch": 4.184100418410042,
|
| 245 |
"grad_norm": 4.519611358642578,
|
| 246 |
"learning_rate": 5.964002440512509e-06,
|
| 247 |
+
"loss": 0.8884,
|
| 248 |
"step": 15000
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"epoch": 4.184100418410042,
|
| 252 |
"eval_cosine_accuracy": 0.9614049792289734,
|
| 253 |
"eval_loss": 1.0071080923080444,
|
| 254 |
+
"eval_runtime": 21.2593,
|
| 255 |
+
"eval_samples_per_second": 447.287,
|
| 256 |
+
"eval_steps_per_second": 3.528,
|
| 257 |
"step": 15000
|
| 258 |
},
|
| 259 |
{
|
|
|
|
| 267 |
"epoch": 4.463040446304045,
|
| 268 |
"eval_cosine_accuracy": 0.9612998366355896,
|
| 269 |
"eval_loss": 1.0004464387893677,
|
| 270 |
+
"eval_runtime": 21.271,
|
| 271 |
+
"eval_samples_per_second": 447.04,
|
| 272 |
+
"eval_steps_per_second": 3.526,
|
| 273 |
"step": 16000
|
| 274 |
},
|
| 275 |
{
|
|
|
|
| 283 |
"epoch": 4.741980474198048,
|
| 284 |
"eval_cosine_accuracy": 0.9617204666137695,
|
| 285 |
"eval_loss": 1.0060843229293823,
|
| 286 |
+
"eval_runtime": 21.4196,
|
| 287 |
+
"eval_samples_per_second": 443.939,
|
| 288 |
+
"eval_steps_per_second": 3.501,
|
| 289 |
"step": 17000
|
| 290 |
}
|
| 291 |
],
|
checkpoint-17925/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5752
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:93b1607a15af70cf6b32a232aa87f4ee1d3038e5a70922f63dc141b69792d409
|
| 3 |
size 5752
|