Training in progress, step 57000, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +294 -6
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 373077376
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0cae5bd40cebc93aa05562030f2b12652a8c928f29de2177774bdfb46d57e338
|
| 3 |
size 373077376
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 422377931
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3455cdb481c045d67e6c29cc19cbf512f3f4349a97202825124c73528f7b3652
|
| 3 |
size 422377931
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15365
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e189d953d56fbbb1dc48bf345790e84a3fa8ff54652aa62e6c6b85a7192fc179
|
| 3 |
size 15365
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15365
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f952436541ae47c1950b5a1b819228a6aa1f641c3a191645aa67b0892fe0b260
|
| 3 |
size 15365
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15365
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:71bb0fe2b2559162529fb3a1e66e184ec5cc1d927ba0e24ba8b4215d6d671a7b
|
| 3 |
size 15365
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15365
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf14548f27ec2bb28d193492f3a62a0d7bf30afb378a1eaed2530adf64f04c79
|
| 3 |
size 15365
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cca6fb53f371a50c66a1841bfc607b1baa7b2a69fcea3747532bd4d0962b4499
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 2.
|
| 4 |
-
"best_model_checkpoint": "./artifacts/models/pretrain-4gpu-8k-ckpt/checkpoint-
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 1000,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -16136,6 +16136,294 @@
|
|
| 16136 |
"eval_samples_per_second": 100.371,
|
| 16137 |
"eval_steps_per_second": 3.158,
|
| 16138 |
"step": 56000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16139 |
}
|
| 16140 |
],
|
| 16141 |
"logging_steps": 25,
|
|
@@ -16155,7 +16443,7 @@
|
|
| 16155 |
"attributes": {}
|
| 16156 |
}
|
| 16157 |
},
|
| 16158 |
-
"total_flos": 2.
|
| 16159 |
"train_batch_size": 16,
|
| 16160 |
"trial_name": null,
|
| 16161 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 57000,
|
| 3 |
+
"best_metric": 2.5533201694488525,
|
| 4 |
+
"best_model_checkpoint": "./artifacts/models/pretrain-4gpu-8k-ckpt/checkpoint-57000",
|
| 5 |
+
"epoch": 0.9983535923214348,
|
| 6 |
"eval_steps": 1000,
|
| 7 |
+
"global_step": 57000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 16136 |
"eval_samples_per_second": 100.371,
|
| 16137 |
"eval_steps_per_second": 3.158,
|
| 16138 |
"step": 56000
|
| 16139 |
+
},
|
| 16140 |
+
{
|
| 16141 |
+
"epoch": 0.9812764914001472,
|
| 16142 |
+
"grad_norm": 0.2109375,
|
| 16143 |
+
"learning_rate": 9.59941896743366e-07,
|
| 16144 |
+
"loss": 2.5361,
|
| 16145 |
+
"step": 56025
|
| 16146 |
+
},
|
| 16147 |
+
{
|
| 16148 |
+
"epoch": 0.9817143657827443,
|
| 16149 |
+
"grad_norm": 0.20703125,
|
| 16150 |
+
"learning_rate": 9.156223661438423e-07,
|
| 16151 |
+
"loss": 2.5396,
|
| 16152 |
+
"step": 56050
|
| 16153 |
+
},
|
| 16154 |
+
{
|
| 16155 |
+
"epoch": 0.9821522401653414,
|
| 16156 |
+
"grad_norm": 0.212890625,
|
| 16157 |
+
"learning_rate": 8.723493140556648e-07,
|
| 16158 |
+
"loss": 2.5361,
|
| 16159 |
+
"step": 56075
|
| 16160 |
+
},
|
| 16161 |
+
{
|
| 16162 |
+
"epoch": 0.9825901145479385,
|
| 16163 |
+
"grad_norm": 0.203125,
|
| 16164 |
+
"learning_rate": 8.301228312136422e-07,
|
| 16165 |
+
"loss": 2.5437,
|
| 16166 |
+
"step": 56100
|
| 16167 |
+
},
|
| 16168 |
+
{
|
| 16169 |
+
"epoch": 0.9830279889305356,
|
| 16170 |
+
"grad_norm": 0.2177734375,
|
| 16171 |
+
"learning_rate": 7.88943006158116e-07,
|
| 16172 |
+
"loss": 2.5395,
|
| 16173 |
+
"step": 56125
|
| 16174 |
+
},
|
| 16175 |
+
{
|
| 16176 |
+
"epoch": 0.9834658633131327,
|
| 16177 |
+
"grad_norm": 0.21875,
|
| 16178 |
+
"learning_rate": 7.488099252347946e-07,
|
| 16179 |
+
"loss": 2.5497,
|
| 16180 |
+
"step": 56150
|
| 16181 |
+
},
|
| 16182 |
+
{
|
| 16183 |
+
"epoch": 0.9839037376957298,
|
| 16184 |
+
"grad_norm": 0.21484375,
|
| 16185 |
+
"learning_rate": 7.097236725945866e-07,
|
| 16186 |
+
"loss": 2.5393,
|
| 16187 |
+
"step": 56175
|
| 16188 |
+
},
|
| 16189 |
+
{
|
| 16190 |
+
"epoch": 0.9843416120783269,
|
| 16191 |
+
"grad_norm": 0.212890625,
|
| 16192 |
+
"learning_rate": 6.716843301934894e-07,
|
| 16193 |
+
"loss": 2.5412,
|
| 16194 |
+
"step": 56200
|
| 16195 |
+
},
|
| 16196 |
+
{
|
| 16197 |
+
"epoch": 0.9847794864609241,
|
| 16198 |
+
"grad_norm": 0.2138671875,
|
| 16199 |
+
"learning_rate": 6.346919777922011e-07,
|
| 16200 |
+
"loss": 2.546,
|
| 16201 |
+
"step": 56225
|
| 16202 |
+
},
|
| 16203 |
+
{
|
| 16204 |
+
"epoch": 0.9852173608435212,
|
| 16205 |
+
"grad_norm": 0.212890625,
|
| 16206 |
+
"learning_rate": 5.987466929561757e-07,
|
| 16207 |
+
"loss": 2.546,
|
| 16208 |
+
"step": 56250
|
| 16209 |
+
},
|
| 16210 |
+
{
|
| 16211 |
+
"epoch": 0.9856552352261183,
|
| 16212 |
+
"grad_norm": 0.2265625,
|
| 16213 |
+
"learning_rate": 5.638485510554014e-07,
|
| 16214 |
+
"loss": 2.5372,
|
| 16215 |
+
"step": 56275
|
| 16216 |
+
},
|
| 16217 |
+
{
|
| 16218 |
+
"epoch": 0.9860931096087154,
|
| 16219 |
+
"grad_norm": 0.2197265625,
|
| 16220 |
+
"learning_rate": 5.29997625264178e-07,
|
| 16221 |
+
"loss": 2.5395,
|
| 16222 |
+
"step": 56300
|
| 16223 |
+
},
|
| 16224 |
+
{
|
| 16225 |
+
"epoch": 0.9865309839913126,
|
| 16226 |
+
"grad_norm": 0.255859375,
|
| 16227 |
+
"learning_rate": 4.971939865610064e-07,
|
| 16228 |
+
"loss": 2.5328,
|
| 16229 |
+
"step": 56325
|
| 16230 |
+
},
|
| 16231 |
+
{
|
| 16232 |
+
"epoch": 0.9869688583739097,
|
| 16233 |
+
"grad_norm": 0.2255859375,
|
| 16234 |
+
"learning_rate": 4.654377037284774e-07,
|
| 16235 |
+
"loss": 2.5318,
|
| 16236 |
+
"step": 56350
|
| 16237 |
+
},
|
| 16238 |
+
{
|
| 16239 |
+
"epoch": 0.9874067327565068,
|
| 16240 |
+
"grad_norm": 0.2021484375,
|
| 16241 |
+
"learning_rate": 4.347288433530494e-07,
|
| 16242 |
+
"loss": 2.5302,
|
| 16243 |
+
"step": 56375
|
| 16244 |
+
},
|
| 16245 |
+
{
|
| 16246 |
+
"epoch": 0.9878446071391039,
|
| 16247 |
+
"grad_norm": 0.20703125,
|
| 16248 |
+
"learning_rate": 4.050674698248824e-07,
|
| 16249 |
+
"loss": 2.5494,
|
| 16250 |
+
"step": 56400
|
| 16251 |
+
},
|
| 16252 |
+
{
|
| 16253 |
+
"epoch": 0.9882824815217011,
|
| 16254 |
+
"grad_norm": 0.2138671875,
|
| 16255 |
+
"learning_rate": 3.764536453380041e-07,
|
| 16256 |
+
"loss": 2.5285,
|
| 16257 |
+
"step": 56425
|
| 16258 |
+
},
|
| 16259 |
+
{
|
| 16260 |
+
"epoch": 0.9887203559042982,
|
| 16261 |
+
"grad_norm": 0.2158203125,
|
| 16262 |
+
"learning_rate": 3.4888742988964383e-07,
|
| 16263 |
+
"loss": 2.5454,
|
| 16264 |
+
"step": 56450
|
| 16265 |
+
},
|
| 16266 |
+
{
|
| 16267 |
+
"epoch": 0.9891582302868953,
|
| 16268 |
+
"grad_norm": 0.2177734375,
|
| 16269 |
+
"learning_rate": 3.2236888128067687e-07,
|
| 16270 |
+
"loss": 2.5468,
|
| 16271 |
+
"step": 56475
|
| 16272 |
+
},
|
| 16273 |
+
{
|
| 16274 |
+
"epoch": 0.9895961046694924,
|
| 16275 |
+
"grad_norm": 0.21484375,
|
| 16276 |
+
"learning_rate": 2.968980551150136e-07,
|
| 16277 |
+
"loss": 2.5422,
|
| 16278 |
+
"step": 56500
|
| 16279 |
+
},
|
| 16280 |
+
{
|
| 16281 |
+
"epoch": 0.9900339790520896,
|
| 16282 |
+
"grad_norm": 0.21875,
|
| 16283 |
+
"learning_rate": 2.7247500479982145e-07,
|
| 16284 |
+
"loss": 2.5383,
|
| 16285 |
+
"step": 56525
|
| 16286 |
+
},
|
| 16287 |
+
{
|
| 16288 |
+
"epoch": 0.9904718534346867,
|
| 16289 |
+
"grad_norm": 0.208984375,
|
| 16290 |
+
"learning_rate": 2.490997815453033e-07,
|
| 16291 |
+
"loss": 2.5277,
|
| 16292 |
+
"step": 56550
|
| 16293 |
+
},
|
| 16294 |
+
{
|
| 16295 |
+
"epoch": 0.9909097278172838,
|
| 16296 |
+
"grad_norm": 0.2197265625,
|
| 16297 |
+
"learning_rate": 2.2677243436453056e-07,
|
| 16298 |
+
"loss": 2.5163,
|
| 16299 |
+
"step": 56575
|
| 16300 |
+
},
|
| 16301 |
+
{
|
| 16302 |
+
"epoch": 0.9913476021998809,
|
| 16303 |
+
"grad_norm": 0.220703125,
|
| 16304 |
+
"learning_rate": 2.054930100734431e-07,
|
| 16305 |
+
"loss": 2.5343,
|
| 16306 |
+
"step": 56600
|
| 16307 |
+
},
|
| 16308 |
+
{
|
| 16309 |
+
"epoch": 0.9917854765824781,
|
| 16310 |
+
"grad_norm": 0.212890625,
|
| 16311 |
+
"learning_rate": 1.8526155329057214e-07,
|
| 16312 |
+
"loss": 2.5355,
|
| 16313 |
+
"step": 56625
|
| 16314 |
+
},
|
| 16315 |
+
{
|
| 16316 |
+
"epoch": 0.9922233509650752,
|
| 16317 |
+
"grad_norm": 0.2138671875,
|
| 16318 |
+
"learning_rate": 1.6607810643731737e-07,
|
| 16319 |
+
"loss": 2.5432,
|
| 16320 |
+
"step": 56650
|
| 16321 |
+
},
|
| 16322 |
+
{
|
| 16323 |
+
"epoch": 0.9926612253476722,
|
| 16324 |
+
"grad_norm": 0.216796875,
|
| 16325 |
+
"learning_rate": 1.47942709737392e-07,
|
| 16326 |
+
"loss": 2.5337,
|
| 16327 |
+
"step": 56675
|
| 16328 |
+
},
|
| 16329 |
+
{
|
| 16330 |
+
"epoch": 0.9930990997302693,
|
| 16331 |
+
"grad_norm": 0.2119140625,
|
| 16332 |
+
"learning_rate": 1.3085540121698937e-07,
|
| 16333 |
+
"loss": 2.5476,
|
| 16334 |
+
"step": 56700
|
| 16335 |
+
},
|
| 16336 |
+
{
|
| 16337 |
+
"epoch": 0.9935369741128665,
|
| 16338 |
+
"grad_norm": 0.224609375,
|
| 16339 |
+
"learning_rate": 1.1481621670478282e-07,
|
| 16340 |
+
"loss": 2.5355,
|
| 16341 |
+
"step": 56725
|
| 16342 |
+
},
|
| 16343 |
+
{
|
| 16344 |
+
"epoch": 0.9939748484954636,
|
| 16345 |
+
"grad_norm": 0.2109375,
|
| 16346 |
+
"learning_rate": 9.982518983170375e-08,
|
| 16347 |
+
"loss": 2.5383,
|
| 16348 |
+
"step": 56750
|
| 16349 |
+
},
|
| 16350 |
+
{
|
| 16351 |
+
"epoch": 0.9944127228780607,
|
| 16352 |
+
"grad_norm": 0.2080078125,
|
| 16353 |
+
"learning_rate": 8.58823520308305e-08,
|
| 16354 |
+
"loss": 2.542,
|
| 16355 |
+
"step": 56775
|
| 16356 |
+
},
|
| 16357 |
+
{
|
| 16358 |
+
"epoch": 0.9948505972606578,
|
| 16359 |
+
"grad_norm": 0.220703125,
|
| 16360 |
+
"learning_rate": 7.298773253749946e-08,
|
| 16361 |
+
"loss": 2.5432,
|
| 16362 |
+
"step": 56800
|
| 16363 |
+
},
|
| 16364 |
+
{
|
| 16365 |
+
"epoch": 0.995288471643255,
|
| 16366 |
+
"grad_norm": 0.212890625,
|
| 16367 |
+
"learning_rate": 6.114135838908296e-08,
|
| 16368 |
+
"loss": 2.5368,
|
| 16369 |
+
"step": 56825
|
| 16370 |
+
},
|
| 16371 |
+
{
|
| 16372 |
+
"epoch": 0.9957263460258521,
|
| 16373 |
+
"grad_norm": 0.212890625,
|
| 16374 |
+
"learning_rate": 5.0343254425044837e-08,
|
| 16375 |
+
"loss": 2.5428,
|
| 16376 |
+
"step": 56850
|
| 16377 |
+
},
|
| 16378 |
+
{
|
| 16379 |
+
"epoch": 0.9961642204084492,
|
| 16380 |
+
"grad_norm": 0.216796875,
|
| 16381 |
+
"learning_rate": 4.0593443286773834e-08,
|
| 16382 |
+
"loss": 2.5417,
|
| 16383 |
+
"step": 56875
|
| 16384 |
+
},
|
| 16385 |
+
{
|
| 16386 |
+
"epoch": 0.9966020947910463,
|
| 16387 |
+
"grad_norm": 0.2109375,
|
| 16388 |
+
"learning_rate": 3.189194541769469e-08,
|
| 16389 |
+
"loss": 2.5482,
|
| 16390 |
+
"step": 56900
|
| 16391 |
+
},
|
| 16392 |
+
{
|
| 16393 |
+
"epoch": 0.9970399691736435,
|
| 16394 |
+
"grad_norm": 0.21484375,
|
| 16395 |
+
"learning_rate": 2.4238779063046057e-08,
|
| 16396 |
+
"loss": 2.5316,
|
| 16397 |
+
"step": 56925
|
| 16398 |
+
},
|
| 16399 |
+
{
|
| 16400 |
+
"epoch": 0.9974778435562406,
|
| 16401 |
+
"grad_norm": 0.2138671875,
|
| 16402 |
+
"learning_rate": 1.7633960269991535e-08,
|
| 16403 |
+
"loss": 2.5438,
|
| 16404 |
+
"step": 56950
|
| 16405 |
+
},
|
| 16406 |
+
{
|
| 16407 |
+
"epoch": 0.9979157179388377,
|
| 16408 |
+
"grad_norm": 0.2119140625,
|
| 16409 |
+
"learning_rate": 1.2077502887453129e-08,
|
| 16410 |
+
"loss": 2.538,
|
| 16411 |
+
"step": 56975
|
| 16412 |
+
},
|
| 16413 |
+
{
|
| 16414 |
+
"epoch": 0.9983535923214348,
|
| 16415 |
+
"grad_norm": 0.2109375,
|
| 16416 |
+
"learning_rate": 7.569418566222286e-09,
|
| 16417 |
+
"loss": 2.5309,
|
| 16418 |
+
"step": 57000
|
| 16419 |
+
},
|
| 16420 |
+
{
|
| 16421 |
+
"epoch": 0.9983535923214348,
|
| 16422 |
+
"eval_loss": 2.5533201694488525,
|
| 16423 |
+
"eval_runtime": 36.4092,
|
| 16424 |
+
"eval_samples_per_second": 100.387,
|
| 16425 |
+
"eval_steps_per_second": 3.159,
|
| 16426 |
+
"step": 57000
|
| 16427 |
}
|
| 16428 |
],
|
| 16429 |
"logging_steps": 25,
|
|
|
|
| 16443 |
"attributes": {}
|
| 16444 |
}
|
| 16445 |
},
|
| 16446 |
+
"total_flos": 2.8910359451038384e+19,
|
| 16447 |
"train_batch_size": 16,
|
| 16448 |
"trial_name": null,
|
| 16449 |
"trial_params": null
|