End of training
Browse files- all_results.json +6 -6
- eval_results.json +3 -3
- train_results.json +3 -3
- trainer_state.json +18 -18
all_results.json
CHANGED
|
@@ -2,11 +2,11 @@
|
|
| 2 |
"epoch": 4.99,
|
| 3 |
"eval_accuracy": 0.9984836997725549,
|
| 4 |
"eval_loss": 0.00841992162168026,
|
| 5 |
-
"eval_runtime": 3.
|
| 6 |
-
"eval_samples_per_second":
|
| 7 |
-
"eval_steps_per_second": 21.
|
| 8 |
"train_loss": 0.07950978533641712,
|
| 9 |
-
"train_runtime":
|
| 10 |
-
"train_samples_per_second":
|
| 11 |
-
"train_steps_per_second":
|
| 12 |
}
|
|
|
|
| 2 |
"epoch": 4.99,
|
| 3 |
"eval_accuracy": 0.9984836997725549,
|
| 4 |
"eval_loss": 0.00841992162168026,
|
| 5 |
+
"eval_runtime": 3.7799,
|
| 6 |
+
"eval_samples_per_second": 697.905,
|
| 7 |
+
"eval_steps_per_second": 21.958,
|
| 8 |
"train_loss": 0.07950978533641712,
|
| 9 |
+
"train_runtime": 480.1024,
|
| 10 |
+
"train_samples_per_second": 247.218,
|
| 11 |
+
"train_steps_per_second": 1.927
|
| 12 |
}
|
eval_results.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
"epoch": 4.99,
|
| 3 |
"eval_accuracy": 0.9984836997725549,
|
| 4 |
"eval_loss": 0.00841992162168026,
|
| 5 |
-
"eval_runtime": 3.
|
| 6 |
-
"eval_samples_per_second":
|
| 7 |
-
"eval_steps_per_second": 21.
|
| 8 |
}
|
|
|
|
| 2 |
"epoch": 4.99,
|
| 3 |
"eval_accuracy": 0.9984836997725549,
|
| 4 |
"eval_loss": 0.00841992162168026,
|
| 5 |
+
"eval_runtime": 3.7799,
|
| 6 |
+
"eval_samples_per_second": 697.905,
|
| 7 |
+
"eval_steps_per_second": 21.958
|
| 8 |
}
|
train_results.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 4.99,
|
| 3 |
"train_loss": 0.07950978533641712,
|
| 4 |
-
"train_runtime":
|
| 5 |
-
"train_samples_per_second":
|
| 6 |
-
"train_steps_per_second":
|
| 7 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 4.99,
|
| 3 |
"train_loss": 0.07950978533641712,
|
| 4 |
+
"train_runtime": 480.1024,
|
| 5 |
+
"train_samples_per_second": 247.218,
|
| 6 |
+
"train_steps_per_second": 1.927
|
| 7 |
}
|
trainer_state.json
CHANGED
|
@@ -119,9 +119,9 @@
|
|
| 119 |
"epoch": 1.0,
|
| 120 |
"eval_accuracy": 0.9844579226686884,
|
| 121 |
"eval_loss": 0.056594911962747574,
|
| 122 |
-
"eval_runtime": 3.
|
| 123 |
-
"eval_samples_per_second":
|
| 124 |
-
"eval_steps_per_second": 20.
|
| 125 |
"step": 185
|
| 126 |
},
|
| 127 |
{
|
|
@@ -242,9 +242,9 @@
|
|
| 242 |
"epoch": 2.0,
|
| 243 |
"eval_accuracy": 0.9909021986353298,
|
| 244 |
"eval_loss": 0.04098781570792198,
|
| 245 |
-
"eval_runtime": 3.
|
| 246 |
-
"eval_samples_per_second":
|
| 247 |
-
"eval_steps_per_second": 21.
|
| 248 |
"step": 371
|
| 249 |
},
|
| 250 |
{
|
|
@@ -359,9 +359,9 @@
|
|
| 359 |
"epoch": 3.0,
|
| 360 |
"eval_accuracy": 0.9901440485216073,
|
| 361 |
"eval_loss": 0.04609154537320137,
|
| 362 |
-
"eval_runtime": 3.
|
| 363 |
-
"eval_samples_per_second":
|
| 364 |
-
"eval_steps_per_second": 21.
|
| 365 |
"step": 556
|
| 366 |
},
|
| 367 |
{
|
|
@@ -482,9 +482,9 @@
|
|
| 482 |
"epoch": 4.0,
|
| 483 |
"eval_accuracy": 0.9984836997725549,
|
| 484 |
"eval_loss": 0.00841992162168026,
|
| 485 |
-
"eval_runtime": 3.
|
| 486 |
-
"eval_samples_per_second":
|
| 487 |
-
"eval_steps_per_second": 21.
|
| 488 |
"step": 742
|
| 489 |
},
|
| 490 |
{
|
|
@@ -599,9 +599,9 @@
|
|
| 599 |
"epoch": 4.99,
|
| 600 |
"eval_accuracy": 0.9984836997725549,
|
| 601 |
"eval_loss": 0.008393567055463791,
|
| 602 |
-
"eval_runtime": 3.
|
| 603 |
-
"eval_samples_per_second":
|
| 604 |
-
"eval_steps_per_second": 21.
|
| 605 |
"step": 925
|
| 606 |
},
|
| 607 |
{
|
|
@@ -609,9 +609,9 @@
|
|
| 609 |
"step": 925,
|
| 610 |
"total_flos": 2.0985718924353213e+18,
|
| 611 |
"train_loss": 0.07950978533641712,
|
| 612 |
-
"train_runtime":
|
| 613 |
-
"train_samples_per_second":
|
| 614 |
-
"train_steps_per_second":
|
| 615 |
}
|
| 616 |
],
|
| 617 |
"max_steps": 925,
|
|
|
|
| 119 |
"epoch": 1.0,
|
| 120 |
"eval_accuracy": 0.9844579226686884,
|
| 121 |
"eval_loss": 0.056594911962747574,
|
| 122 |
+
"eval_runtime": 3.9857,
|
| 123 |
+
"eval_samples_per_second": 661.869,
|
| 124 |
+
"eval_steps_per_second": 20.825,
|
| 125 |
"step": 185
|
| 126 |
},
|
| 127 |
{
|
|
|
|
| 242 |
"epoch": 2.0,
|
| 243 |
"eval_accuracy": 0.9909021986353298,
|
| 244 |
"eval_loss": 0.04098781570792198,
|
| 245 |
+
"eval_runtime": 3.7805,
|
| 246 |
+
"eval_samples_per_second": 697.785,
|
| 247 |
+
"eval_steps_per_second": 21.955,
|
| 248 |
"step": 371
|
| 249 |
},
|
| 250 |
{
|
|
|
|
| 359 |
"epoch": 3.0,
|
| 360 |
"eval_accuracy": 0.9901440485216073,
|
| 361 |
"eval_loss": 0.04609154537320137,
|
| 362 |
+
"eval_runtime": 3.7832,
|
| 363 |
+
"eval_samples_per_second": 697.296,
|
| 364 |
+
"eval_steps_per_second": 21.939,
|
| 365 |
"step": 556
|
| 366 |
},
|
| 367 |
{
|
|
|
|
| 482 |
"epoch": 4.0,
|
| 483 |
"eval_accuracy": 0.9984836997725549,
|
| 484 |
"eval_loss": 0.00841992162168026,
|
| 485 |
+
"eval_runtime": 3.7804,
|
| 486 |
+
"eval_samples_per_second": 697.802,
|
| 487 |
+
"eval_steps_per_second": 21.955,
|
| 488 |
"step": 742
|
| 489 |
},
|
| 490 |
{
|
|
|
|
| 599 |
"epoch": 4.99,
|
| 600 |
"eval_accuracy": 0.9984836997725549,
|
| 601 |
"eval_loss": 0.008393567055463791,
|
| 602 |
+
"eval_runtime": 3.7999,
|
| 603 |
+
"eval_samples_per_second": 694.227,
|
| 604 |
+
"eval_steps_per_second": 21.843,
|
| 605 |
"step": 925
|
| 606 |
},
|
| 607 |
{
|
|
|
|
| 609 |
"step": 925,
|
| 610 |
"total_flos": 2.0985718924353213e+18,
|
| 611 |
"train_loss": 0.07950978533641712,
|
| 612 |
+
"train_runtime": 480.1024,
|
| 613 |
+
"train_samples_per_second": 247.218,
|
| 614 |
+
"train_steps_per_second": 1.927
|
| 615 |
}
|
| 616 |
],
|
| 617 |
"max_steps": 925,
|