Training in progress, epoch 6, checkpoint
Browse files
checkpoint-21510/README.md
CHANGED
|
@@ -366,22 +366,7 @@ You can finetune this model on your own dataset.
|
|
| 366 |
### Training Logs
|
| 367 |
| Epoch | Step | Training Loss | Validation Loss | cosine_accuracy |
|
| 368 |
|:------:|:-----:|:-------------:|:---------------:|:---------------:|
|
| 369 |
-
|
|
| 370 |
-
| 0.2789 | 1000 | 1.6731 | 1.2452 | 0.9435 |
|
| 371 |
-
| 0.5579 | 2000 | 1.4545 | 1.1861 | 0.9491 |
|
| 372 |
-
| 0.8368 | 3000 | 1.3514 | 1.1498 | 0.9530 |
|
| 373 |
-
| 1.1158 | 4000 | 1.2499 | 1.1096 | 0.9535 |
|
| 374 |
-
| 1.3947 | 5000 | 1.1977 | 1.0828 | 0.9589 |
|
| 375 |
-
| 1.6736 | 6000 | 1.1468 | 1.0728 | 0.9585 |
|
| 376 |
-
| 1.9526 | 7000 | 1.1011 | 1.0473 | 0.9593 |
|
| 377 |
-
| 2.2315 | 8000 | 1.0401 | 1.0429 | 0.9575 |
|
| 378 |
-
| 2.5105 | 9000 | 1.0196 | 1.0377 | 0.9599 |
|
| 379 |
-
| 2.7894 | 10000 | 0.9939 | 1.0395 | 0.9592 |
|
| 380 |
-
| 3.0683 | 11000 | 0.9579 | 1.0249 | 0.9612 |
|
| 381 |
-
| 3.3473 | 12000 | 0.9437 | 1.0262 | 0.9600 |
|
| 382 |
-
| 3.6262 | 13000 | 0.9279 | 1.0129 | 0.9618 |
|
| 383 |
-
| 3.9052 | 14000 | 0.9184 | 1.0108 | 0.9621 |
|
| 384 |
-
| 4.1841 | 15000 | 0.8807 | 1.0071 | 0.9614 |
|
| 385 |
| 4.4630 | 16000 | 0.8802 | 1.0004 | 0.9613 |
|
| 386 |
| 4.7420 | 17000 | 0.8752 | 1.0061 | 0.9617 |
|
| 387 |
| 5.0209 | 18000 | 0.8628 | 1.0004 | 0.9629 |
|
|
|
|
| 366 |
### Training Logs
|
| 367 |
| Epoch | Step | Training Loss | Validation Loss | cosine_accuracy |
|
| 368 |
|:------:|:-----:|:-------------:|:---------------:|:---------------:|
|
| 369 |
+
| 4.1841 | 15000 | 0.8884 | 1.0071 | 0.9614 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
| 4.4630 | 16000 | 0.8802 | 1.0004 | 0.9613 |
|
| 371 |
| 4.7420 | 17000 | 0.8752 | 1.0061 | 0.9617 |
|
| 372 |
| 5.0209 | 18000 | 0.8628 | 1.0004 | 0.9629 |
|
checkpoint-21510/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1219bd9031f06886018f52413a10fcb6ac47375db53e4b9ff515c0e7adf9966
|
| 3 |
size 14244
|
checkpoint-21510/trainer_state.json
CHANGED
|
@@ -244,16 +244,16 @@
|
|
| 244 |
"epoch": 4.184100418410042,
|
| 245 |
"grad_norm": 4.519611358642578,
|
| 246 |
"learning_rate": 5.964002440512509e-06,
|
| 247 |
-
"loss": 0.
|
| 248 |
"step": 15000
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"epoch": 4.184100418410042,
|
| 252 |
"eval_cosine_accuracy": 0.9614049792289734,
|
| 253 |
"eval_loss": 1.0071080923080444,
|
| 254 |
-
"eval_runtime":
|
| 255 |
-
"eval_samples_per_second":
|
| 256 |
-
"eval_steps_per_second": 3.
|
| 257 |
"step": 15000
|
| 258 |
},
|
| 259 |
{
|
|
@@ -267,9 +267,9 @@
|
|
| 267 |
"epoch": 4.463040446304045,
|
| 268 |
"eval_cosine_accuracy": 0.9612998366355896,
|
| 269 |
"eval_loss": 1.0004464387893677,
|
| 270 |
-
"eval_runtime":
|
| 271 |
-
"eval_samples_per_second":
|
| 272 |
-
"eval_steps_per_second": 3.
|
| 273 |
"step": 16000
|
| 274 |
},
|
| 275 |
{
|
|
@@ -283,9 +283,9 @@
|
|
| 283 |
"epoch": 4.741980474198048,
|
| 284 |
"eval_cosine_accuracy": 0.9617204666137695,
|
| 285 |
"eval_loss": 1.0060843229293823,
|
| 286 |
-
"eval_runtime":
|
| 287 |
-
"eval_samples_per_second":
|
| 288 |
-
"eval_steps_per_second": 3.
|
| 289 |
"step": 17000
|
| 290 |
},
|
| 291 |
{
|
|
@@ -299,9 +299,9 @@
|
|
| 299 |
"epoch": 5.02092050209205,
|
| 300 |
"eval_cosine_accuracy": 0.9628772735595703,
|
| 301 |
"eval_loss": 1.0004209280014038,
|
| 302 |
-
"eval_runtime":
|
| 303 |
-
"eval_samples_per_second":
|
| 304 |
-
"eval_steps_per_second": 3.
|
| 305 |
"step": 18000
|
| 306 |
},
|
| 307 |
{
|
|
@@ -315,9 +315,9 @@
|
|
| 315 |
"epoch": 5.299860529986053,
|
| 316 |
"eval_cosine_accuracy": 0.9621411561965942,
|
| 317 |
"eval_loss": 1.0009299516677856,
|
| 318 |
-
"eval_runtime":
|
| 319 |
-
"eval_samples_per_second":
|
| 320 |
-
"eval_steps_per_second": 3.
|
| 321 |
"step": 19000
|
| 322 |
},
|
| 323 |
{
|
|
@@ -331,9 +331,9 @@
|
|
| 331 |
"epoch": 5.578800557880056,
|
| 332 |
"eval_cosine_accuracy": 0.9630876183509827,
|
| 333 |
"eval_loss": 0.9901958703994751,
|
| 334 |
-
"eval_runtime": 22.
|
| 335 |
-
"eval_samples_per_second":
|
| 336 |
-
"eval_steps_per_second": 3.
|
| 337 |
"step": 20000
|
| 338 |
},
|
| 339 |
{
|
|
@@ -347,9 +347,9 @@
|
|
| 347 |
"epoch": 5.857740585774058,
|
| 348 |
"eval_cosine_accuracy": 0.9632979035377502,
|
| 349 |
"eval_loss": 0.992326557636261,
|
| 350 |
-
"eval_runtime":
|
| 351 |
-
"eval_samples_per_second":
|
| 352 |
-
"eval_steps_per_second": 3.
|
| 353 |
"step": 21000
|
| 354 |
}
|
| 355 |
],
|
|
|
|
| 244 |
"epoch": 4.184100418410042,
|
| 245 |
"grad_norm": 4.519611358642578,
|
| 246 |
"learning_rate": 5.964002440512509e-06,
|
| 247 |
+
"loss": 0.8884,
|
| 248 |
"step": 15000
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"epoch": 4.184100418410042,
|
| 252 |
"eval_cosine_accuracy": 0.9614049792289734,
|
| 253 |
"eval_loss": 1.0071080923080444,
|
| 254 |
+
"eval_runtime": 21.2593,
|
| 255 |
+
"eval_samples_per_second": 447.287,
|
| 256 |
+
"eval_steps_per_second": 3.528,
|
| 257 |
"step": 15000
|
| 258 |
},
|
| 259 |
{
|
|
|
|
| 267 |
"epoch": 4.463040446304045,
|
| 268 |
"eval_cosine_accuracy": 0.9612998366355896,
|
| 269 |
"eval_loss": 1.0004464387893677,
|
| 270 |
+
"eval_runtime": 21.271,
|
| 271 |
+
"eval_samples_per_second": 447.04,
|
| 272 |
+
"eval_steps_per_second": 3.526,
|
| 273 |
"step": 16000
|
| 274 |
},
|
| 275 |
{
|
|
|
|
| 283 |
"epoch": 4.741980474198048,
|
| 284 |
"eval_cosine_accuracy": 0.9617204666137695,
|
| 285 |
"eval_loss": 1.0060843229293823,
|
| 286 |
+
"eval_runtime": 21.4196,
|
| 287 |
+
"eval_samples_per_second": 443.939,
|
| 288 |
+
"eval_steps_per_second": 3.501,
|
| 289 |
"step": 17000
|
| 290 |
},
|
| 291 |
{
|
|
|
|
| 299 |
"epoch": 5.02092050209205,
|
| 300 |
"eval_cosine_accuracy": 0.9628772735595703,
|
| 301 |
"eval_loss": 1.0004209280014038,
|
| 302 |
+
"eval_runtime": 21.3936,
|
| 303 |
+
"eval_samples_per_second": 444.479,
|
| 304 |
+
"eval_steps_per_second": 3.506,
|
| 305 |
"step": 18000
|
| 306 |
},
|
| 307 |
{
|
|
|
|
| 315 |
"epoch": 5.299860529986053,
|
| 316 |
"eval_cosine_accuracy": 0.9621411561965942,
|
| 317 |
"eval_loss": 1.0009299516677856,
|
| 318 |
+
"eval_runtime": 21.2856,
|
| 319 |
+
"eval_samples_per_second": 446.733,
|
| 320 |
+
"eval_steps_per_second": 3.524,
|
| 321 |
"step": 19000
|
| 322 |
},
|
| 323 |
{
|
|
|
|
| 331 |
"epoch": 5.578800557880056,
|
| 332 |
"eval_cosine_accuracy": 0.9630876183509827,
|
| 333 |
"eval_loss": 0.9901958703994751,
|
| 334 |
+
"eval_runtime": 22.6062,
|
| 335 |
+
"eval_samples_per_second": 420.636,
|
| 336 |
+
"eval_steps_per_second": 3.318,
|
| 337 |
"step": 20000
|
| 338 |
},
|
| 339 |
{
|
|
|
|
| 347 |
"epoch": 5.857740585774058,
|
| 348 |
"eval_cosine_accuracy": 0.9632979035377502,
|
| 349 |
"eval_loss": 0.992326557636261,
|
| 350 |
+
"eval_runtime": 21.2926,
|
| 351 |
+
"eval_samples_per_second": 446.587,
|
| 352 |
+
"eval_steps_per_second": 3.522,
|
| 353 |
"step": 21000
|
| 354 |
}
|
| 355 |
],
|
checkpoint-21510/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5752
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:93b1607a15af70cf6b32a232aa87f4ee1d3038e5a70922f63dc141b69792d409
|
| 3 |
size 5752
|