Upload trainer_state.json with huggingface_hub
Browse files- trainer_state.json +30 -30
trainer_state.json
CHANGED
|
@@ -48,9 +48,9 @@
|
|
| 48 |
"epoch": 1.0416666666666667,
|
| 49 |
"eval_accuracy": 0.4625,
|
| 50 |
"eval_loss": 1.135870099067688,
|
| 51 |
-
"eval_runtime": 1.
|
| 52 |
-
"eval_samples_per_second":
|
| 53 |
-
"eval_steps_per_second": 7.
|
| 54 |
"step": 50
|
| 55 |
},
|
| 56 |
{
|
|
@@ -92,9 +92,9 @@
|
|
| 92 |
"epoch": 2.0833333333333335,
|
| 93 |
"eval_accuracy": 0.725,
|
| 94 |
"eval_loss": 1.0034687519073486,
|
| 95 |
-
"eval_runtime": 1.
|
| 96 |
-
"eval_samples_per_second":
|
| 97 |
-
"eval_steps_per_second": 7.
|
| 98 |
"step": 100
|
| 99 |
},
|
| 100 |
{
|
|
@@ -136,9 +136,9 @@
|
|
| 136 |
"epoch": 3.125,
|
| 137 |
"eval_accuracy": 0.7625,
|
| 138 |
"eval_loss": 0.8992247581481934,
|
| 139 |
-
"eval_runtime": 1.
|
| 140 |
-
"eval_samples_per_second":
|
| 141 |
-
"eval_steps_per_second":
|
| 142 |
"step": 150
|
| 143 |
},
|
| 144 |
{
|
|
@@ -180,9 +180,9 @@
|
|
| 180 |
"epoch": 4.166666666666667,
|
| 181 |
"eval_accuracy": 0.8,
|
| 182 |
"eval_loss": 0.7827270030975342,
|
| 183 |
-
"eval_runtime": 1.
|
| 184 |
-
"eval_samples_per_second":
|
| 185 |
-
"eval_steps_per_second":
|
| 186 |
"step": 200
|
| 187 |
},
|
| 188 |
{
|
|
@@ -224,9 +224,9 @@
|
|
| 224 |
"epoch": 5.208333333333333,
|
| 225 |
"eval_accuracy": 0.8125,
|
| 226 |
"eval_loss": 0.6670618653297424,
|
| 227 |
-
"eval_runtime": 1.
|
| 228 |
-
"eval_samples_per_second":
|
| 229 |
-
"eval_steps_per_second": 7.
|
| 230 |
"step": 250
|
| 231 |
},
|
| 232 |
{
|
|
@@ -268,9 +268,9 @@
|
|
| 268 |
"epoch": 6.25,
|
| 269 |
"eval_accuracy": 0.725,
|
| 270 |
"eval_loss": 0.6925244927406311,
|
| 271 |
-
"eval_runtime": 1.
|
| 272 |
-
"eval_samples_per_second": 60.
|
| 273 |
-
"eval_steps_per_second": 7.
|
| 274 |
"step": 300
|
| 275 |
},
|
| 276 |
{
|
|
@@ -312,9 +312,9 @@
|
|
| 312 |
"epoch": 7.291666666666667,
|
| 313 |
"eval_accuracy": 0.8125,
|
| 314 |
"eval_loss": 0.5735878348350525,
|
| 315 |
-
"eval_runtime": 1.
|
| 316 |
-
"eval_samples_per_second":
|
| 317 |
-
"eval_steps_per_second": 7.
|
| 318 |
"step": 350
|
| 319 |
},
|
| 320 |
{
|
|
@@ -356,9 +356,9 @@
|
|
| 356 |
"epoch": 8.333333333333334,
|
| 357 |
"eval_accuracy": 0.8125,
|
| 358 |
"eval_loss": 0.5838413238525391,
|
| 359 |
-
"eval_runtime": 1.
|
| 360 |
-
"eval_samples_per_second":
|
| 361 |
-
"eval_steps_per_second": 7.
|
| 362 |
"step": 400
|
| 363 |
},
|
| 364 |
{
|
|
@@ -400,9 +400,9 @@
|
|
| 400 |
"epoch": 9.375,
|
| 401 |
"eval_accuracy": 0.8125,
|
| 402 |
"eval_loss": 0.570094108581543,
|
| 403 |
-
"eval_runtime": 1.
|
| 404 |
-
"eval_samples_per_second":
|
| 405 |
-
"eval_steps_per_second": 7.
|
| 406 |
"step": 450
|
| 407 |
},
|
| 408 |
{
|
|
@@ -431,9 +431,9 @@
|
|
| 431 |
"step": 480,
|
| 432 |
"total_flos": 5.866248766604083e+17,
|
| 433 |
"train_loss": 0.5593519407014053,
|
| 434 |
-
"train_runtime":
|
| 435 |
-
"train_samples_per_second": 22.
|
| 436 |
-
"train_steps_per_second": 1.
|
| 437 |
}
|
| 438 |
],
|
| 439 |
"logging_steps": 10,
|
|
|
|
| 48 |
"epoch": 1.0416666666666667,
|
| 49 |
"eval_accuracy": 0.4625,
|
| 50 |
"eval_loss": 1.135870099067688,
|
| 51 |
+
"eval_runtime": 1.2659,
|
| 52 |
+
"eval_samples_per_second": 63.197,
|
| 53 |
+
"eval_steps_per_second": 7.9,
|
| 54 |
"step": 50
|
| 55 |
},
|
| 56 |
{
|
|
|
|
| 92 |
"epoch": 2.0833333333333335,
|
| 93 |
"eval_accuracy": 0.725,
|
| 94 |
"eval_loss": 1.0034687519073486,
|
| 95 |
+
"eval_runtime": 1.2636,
|
| 96 |
+
"eval_samples_per_second": 63.309,
|
| 97 |
+
"eval_steps_per_second": 7.914,
|
| 98 |
"step": 100
|
| 99 |
},
|
| 100 |
{
|
|
|
|
| 136 |
"epoch": 3.125,
|
| 137 |
"eval_accuracy": 0.7625,
|
| 138 |
"eval_loss": 0.8992247581481934,
|
| 139 |
+
"eval_runtime": 1.2311,
|
| 140 |
+
"eval_samples_per_second": 64.981,
|
| 141 |
+
"eval_steps_per_second": 8.123,
|
| 142 |
"step": 150
|
| 143 |
},
|
| 144 |
{
|
|
|
|
| 180 |
"epoch": 4.166666666666667,
|
| 181 |
"eval_accuracy": 0.8,
|
| 182 |
"eval_loss": 0.7827270030975342,
|
| 183 |
+
"eval_runtime": 1.2396,
|
| 184 |
+
"eval_samples_per_second": 64.536,
|
| 185 |
+
"eval_steps_per_second": 8.067,
|
| 186 |
"step": 200
|
| 187 |
},
|
| 188 |
{
|
|
|
|
| 224 |
"epoch": 5.208333333333333,
|
| 225 |
"eval_accuracy": 0.8125,
|
| 226 |
"eval_loss": 0.6670618653297424,
|
| 227 |
+
"eval_runtime": 1.2584,
|
| 228 |
+
"eval_samples_per_second": 63.572,
|
| 229 |
+
"eval_steps_per_second": 7.946,
|
| 230 |
"step": 250
|
| 231 |
},
|
| 232 |
{
|
|
|
|
| 268 |
"epoch": 6.25,
|
| 269 |
"eval_accuracy": 0.725,
|
| 270 |
"eval_loss": 0.6925244927406311,
|
| 271 |
+
"eval_runtime": 1.3296,
|
| 272 |
+
"eval_samples_per_second": 60.169,
|
| 273 |
+
"eval_steps_per_second": 7.521,
|
| 274 |
"step": 300
|
| 275 |
},
|
| 276 |
{
|
|
|
|
| 312 |
"epoch": 7.291666666666667,
|
| 313 |
"eval_accuracy": 0.8125,
|
| 314 |
"eval_loss": 0.5735878348350525,
|
| 315 |
+
"eval_runtime": 1.2591,
|
| 316 |
+
"eval_samples_per_second": 63.536,
|
| 317 |
+
"eval_steps_per_second": 7.942,
|
| 318 |
"step": 350
|
| 319 |
},
|
| 320 |
{
|
|
|
|
| 356 |
"epoch": 8.333333333333334,
|
| 357 |
"eval_accuracy": 0.8125,
|
| 358 |
"eval_loss": 0.5838413238525391,
|
| 359 |
+
"eval_runtime": 1.3799,
|
| 360 |
+
"eval_samples_per_second": 57.977,
|
| 361 |
+
"eval_steps_per_second": 7.247,
|
| 362 |
"step": 400
|
| 363 |
},
|
| 364 |
{
|
|
|
|
| 400 |
"epoch": 9.375,
|
| 401 |
"eval_accuracy": 0.8125,
|
| 402 |
"eval_loss": 0.570094108581543,
|
| 403 |
+
"eval_runtime": 1.4058,
|
| 404 |
+
"eval_samples_per_second": 56.905,
|
| 405 |
+
"eval_steps_per_second": 7.113,
|
| 406 |
"step": 450
|
| 407 |
},
|
| 408 |
{
|
|
|
|
| 431 |
"step": 480,
|
| 432 |
"total_flos": 5.866248766604083e+17,
|
| 433 |
"train_loss": 0.5593519407014053,
|
| 434 |
+
"train_runtime": 335.8262,
|
| 435 |
+
"train_samples_per_second": 22.541,
|
| 436 |
+
"train_steps_per_second": 1.429
|
| 437 |
}
|
| 438 |
],
|
| 439 |
"logging_steps": 10,
|