Training in progress, epoch 3, checkpoint
Browse files
checkpoint-11032/trainer_state.json
CHANGED
|
@@ -27,9 +27,9 @@
|
|
| 27 |
"epoch": 0.36258158085569253,
|
| 28 |
"eval_cosine_accuracy": 0.9711852073669434,
|
| 29 |
"eval_loss": 0.3902958035469055,
|
| 30 |
-
"eval_runtime": 34.
|
| 31 |
-
"eval_samples_per_second":
|
| 32 |
-
"eval_steps_per_second": 1.
|
| 33 |
"step": 1000
|
| 34 |
},
|
| 35 |
{
|
|
@@ -43,9 +43,9 @@
|
|
| 43 |
"epoch": 0.7251631617113851,
|
| 44 |
"eval_cosine_accuracy": 0.9699232578277588,
|
| 45 |
"eval_loss": 0.37875595688819885,
|
| 46 |
-
"eval_runtime":
|
| 47 |
-
"eval_samples_per_second":
|
| 48 |
-
"eval_steps_per_second": 1.
|
| 49 |
"step": 2000
|
| 50 |
},
|
| 51 |
{
|
|
@@ -59,9 +59,9 @@
|
|
| 59 |
"epoch": 1.0876811594202898,
|
| 60 |
"eval_cosine_accuracy": 0.9716058373451233,
|
| 61 |
"eval_loss": 0.3854234516620636,
|
| 62 |
-
"eval_runtime": 34.
|
| 63 |
-
"eval_samples_per_second":
|
| 64 |
-
"eval_steps_per_second": 1.
|
| 65 |
"step": 3000
|
| 66 |
},
|
| 67 |
{
|
|
@@ -75,9 +75,9 @@
|
|
| 75 |
"epoch": 1.45,
|
| 76 |
"eval_cosine_accuracy": 0.9708697199821472,
|
| 77 |
"eval_loss": 0.3814031183719635,
|
| 78 |
-
"eval_runtime":
|
| 79 |
-
"eval_samples_per_second":
|
| 80 |
-
"eval_steps_per_second": 1.
|
| 81 |
"step": 4000
|
| 82 |
},
|
| 83 |
{
|
|
@@ -91,9 +91,9 @@
|
|
| 91 |
"epoch": 1.8123188405797102,
|
| 92 |
"eval_cosine_accuracy": 0.9710800051689148,
|
| 93 |
"eval_loss": 0.38102903962135315,
|
| 94 |
-
"eval_runtime":
|
| 95 |
-
"eval_samples_per_second":
|
| 96 |
-
"eval_steps_per_second": 1.
|
| 97 |
"step": 5000
|
| 98 |
},
|
| 99 |
{
|
|
@@ -107,9 +107,9 @@
|
|
| 107 |
"epoch": 2.17463768115942,
|
| 108 |
"eval_cosine_accuracy": 0.9716058373451233,
|
| 109 |
"eval_loss": 0.38304567337036133,
|
| 110 |
-
"eval_runtime": 34.
|
| 111 |
-
"eval_samples_per_second":
|
| 112 |
-
"eval_steps_per_second": 1.
|
| 113 |
"step": 6000
|
| 114 |
},
|
| 115 |
{
|
|
@@ -123,9 +123,9 @@
|
|
| 123 |
"epoch": 2.5369565217391306,
|
| 124 |
"eval_cosine_accuracy": 0.9721316695213318,
|
| 125 |
"eval_loss": 0.3812798261642456,
|
| 126 |
-
"eval_runtime":
|
| 127 |
-
"eval_samples_per_second":
|
| 128 |
-
"eval_steps_per_second": 1.
|
| 129 |
"step": 7000
|
| 130 |
},
|
| 131 |
{
|
|
@@ -139,9 +139,9 @@
|
|
| 139 |
"epoch": 2.8992753623188405,
|
| 140 |
"eval_cosine_accuracy": 0.9715006947517395,
|
| 141 |
"eval_loss": 0.38419124484062195,
|
| 142 |
-
"eval_runtime":
|
| 143 |
-
"eval_samples_per_second":
|
| 144 |
-
"eval_steps_per_second": 1.
|
| 145 |
"step": 8000
|
| 146 |
},
|
| 147 |
{
|
|
@@ -155,9 +155,9 @@
|
|
| 155 |
"epoch": 3.261594202898551,
|
| 156 |
"eval_cosine_accuracy": 0.9712903499603271,
|
| 157 |
"eval_loss": 0.3847886919975281,
|
| 158 |
-
"eval_runtime":
|
| 159 |
-
"eval_samples_per_second":
|
| 160 |
-
"eval_steps_per_second": 1.
|
| 161 |
"step": 9000
|
| 162 |
},
|
| 163 |
{
|
|
@@ -171,9 +171,9 @@
|
|
| 171 |
"epoch": 3.623913043478261,
|
| 172 |
"eval_cosine_accuracy": 0.970974862575531,
|
| 173 |
"eval_loss": 0.3841721713542938,
|
| 174 |
-
"eval_runtime":
|
| 175 |
-
"eval_samples_per_second":
|
| 176 |
-
"eval_steps_per_second": 1.
|
| 177 |
"step": 10000
|
| 178 |
},
|
| 179 |
{
|
|
@@ -187,9 +187,9 @@
|
|
| 187 |
"epoch": 3.986231884057971,
|
| 188 |
"eval_cosine_accuracy": 0.9705542325973511,
|
| 189 |
"eval_loss": 0.38476327061653137,
|
| 190 |
-
"eval_runtime":
|
| 191 |
-
"eval_samples_per_second":
|
| 192 |
-
"eval_steps_per_second": 1.
|
| 193 |
"step": 11000
|
| 194 |
}
|
| 195 |
],
|
|
|
|
| 27 |
"epoch": 0.36258158085569253,
|
| 28 |
"eval_cosine_accuracy": 0.9711852073669434,
|
| 29 |
"eval_loss": 0.3902958035469055,
|
| 30 |
+
"eval_runtime": 34.6603,
|
| 31 |
+
"eval_samples_per_second": 274.348,
|
| 32 |
+
"eval_steps_per_second": 1.096,
|
| 33 |
"step": 1000
|
| 34 |
},
|
| 35 |
{
|
|
|
|
| 43 |
"epoch": 0.7251631617113851,
|
| 44 |
"eval_cosine_accuracy": 0.9699232578277588,
|
| 45 |
"eval_loss": 0.37875595688819885,
|
| 46 |
+
"eval_runtime": 33.7082,
|
| 47 |
+
"eval_samples_per_second": 282.098,
|
| 48 |
+
"eval_steps_per_second": 1.127,
|
| 49 |
"step": 2000
|
| 50 |
},
|
| 51 |
{
|
|
|
|
| 59 |
"epoch": 1.0876811594202898,
|
| 60 |
"eval_cosine_accuracy": 0.9716058373451233,
|
| 61 |
"eval_loss": 0.3854234516620636,
|
| 62 |
+
"eval_runtime": 34.2421,
|
| 63 |
+
"eval_samples_per_second": 277.699,
|
| 64 |
+
"eval_steps_per_second": 1.11,
|
| 65 |
"step": 3000
|
| 66 |
},
|
| 67 |
{
|
|
|
|
| 75 |
"epoch": 1.45,
|
| 76 |
"eval_cosine_accuracy": 0.9708697199821472,
|
| 77 |
"eval_loss": 0.3814031183719635,
|
| 78 |
+
"eval_runtime": 33.9647,
|
| 79 |
+
"eval_samples_per_second": 279.967,
|
| 80 |
+
"eval_steps_per_second": 1.119,
|
| 81 |
"step": 4000
|
| 82 |
},
|
| 83 |
{
|
|
|
|
| 91 |
"epoch": 1.8123188405797102,
|
| 92 |
"eval_cosine_accuracy": 0.9710800051689148,
|
| 93 |
"eval_loss": 0.38102903962135315,
|
| 94 |
+
"eval_runtime": 34.858,
|
| 95 |
+
"eval_samples_per_second": 272.792,
|
| 96 |
+
"eval_steps_per_second": 1.09,
|
| 97 |
"step": 5000
|
| 98 |
},
|
| 99 |
{
|
|
|
|
| 107 |
"epoch": 2.17463768115942,
|
| 108 |
"eval_cosine_accuracy": 0.9716058373451233,
|
| 109 |
"eval_loss": 0.38304567337036133,
|
| 110 |
+
"eval_runtime": 34.2523,
|
| 111 |
+
"eval_samples_per_second": 277.616,
|
| 112 |
+
"eval_steps_per_second": 1.109,
|
| 113 |
"step": 6000
|
| 114 |
},
|
| 115 |
{
|
|
|
|
| 123 |
"epoch": 2.5369565217391306,
|
| 124 |
"eval_cosine_accuracy": 0.9721316695213318,
|
| 125 |
"eval_loss": 0.3812798261642456,
|
| 126 |
+
"eval_runtime": 33.9373,
|
| 127 |
+
"eval_samples_per_second": 280.193,
|
| 128 |
+
"eval_steps_per_second": 1.12,
|
| 129 |
"step": 7000
|
| 130 |
},
|
| 131 |
{
|
|
|
|
| 139 |
"epoch": 2.8992753623188405,
|
| 140 |
"eval_cosine_accuracy": 0.9715006947517395,
|
| 141 |
"eval_loss": 0.38419124484062195,
|
| 142 |
+
"eval_runtime": 34.0546,
|
| 143 |
+
"eval_samples_per_second": 279.228,
|
| 144 |
+
"eval_steps_per_second": 1.116,
|
| 145 |
"step": 8000
|
| 146 |
},
|
| 147 |
{
|
|
|
|
| 155 |
"epoch": 3.261594202898551,
|
| 156 |
"eval_cosine_accuracy": 0.9712903499603271,
|
| 157 |
"eval_loss": 0.3847886919975281,
|
| 158 |
+
"eval_runtime": 34.006,
|
| 159 |
+
"eval_samples_per_second": 279.627,
|
| 160 |
+
"eval_steps_per_second": 1.117,
|
| 161 |
"step": 9000
|
| 162 |
},
|
| 163 |
{
|
|
|
|
| 171 |
"epoch": 3.623913043478261,
|
| 172 |
"eval_cosine_accuracy": 0.970974862575531,
|
| 173 |
"eval_loss": 0.3841721713542938,
|
| 174 |
+
"eval_runtime": 33.9641,
|
| 175 |
+
"eval_samples_per_second": 279.972,
|
| 176 |
+
"eval_steps_per_second": 1.119,
|
| 177 |
"step": 10000
|
| 178 |
},
|
| 179 |
{
|
|
|
|
| 187 |
"epoch": 3.986231884057971,
|
| 188 |
"eval_cosine_accuracy": 0.9705542325973511,
|
| 189 |
"eval_loss": 0.38476327061653137,
|
| 190 |
+
"eval_runtime": 34.6061,
|
| 191 |
+
"eval_samples_per_second": 274.778,
|
| 192 |
+
"eval_steps_per_second": 1.098,
|
| 193 |
"step": 11000
|
| 194 |
}
|
| 195 |
],
|
checkpoint-11032/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5752
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:98354e6a6ec8411cdce0c11c2d3311243c8acedb0b90e2b2c87806e8519cdf4b
|
| 3 |
size 5752
|