Training in progress, step 920000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee3aeed50a2a4c25efaca58a22f8e74eeb4a5176131dc454fbde37631acf016d
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49c53e22d617d625d68b0b7c24d68f147254236ebb3b272bd10eecf6d93598e9
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b532accc0aeba393068677a9f95a8872109f8bff87e999a983cbe001ebba335b
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b532accc0aeba393068677a9f95a8872109f8bff87e999a983cbe001ebba335b
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b532accc0aeba393068677a9f95a8872109f8bff87e999a983cbe001ebba335b
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b532accc0aeba393068677a9f95a8872109f8bff87e999a983cbe001ebba335b
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b532accc0aeba393068677a9f95a8872109f8bff87e999a983cbe001ebba335b
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b532accc0aeba393068677a9f95a8872109f8bff87e999a983cbe001ebba335b
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b532accc0aeba393068677a9f95a8872109f8bff87e999a983cbe001ebba335b
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b532accc0aeba393068677a9f95a8872109f8bff87e999a983cbe001ebba335b
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7df508c344c1f04d3b388cef9605593fbfd129cd18e2830701d3110873541479
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 9.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -18206,11 +18206,211 @@
|
|
| 18206 |
"eval_samples_per_second": 861.024,
|
| 18207 |
"eval_steps_per_second": 13.495,
|
| 18208 |
"step": 910000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18209 |
}
|
| 18210 |
],
|
| 18211 |
"max_steps": 1000000,
|
| 18212 |
"num_train_epochs": 12,
|
| 18213 |
-
"total_flos": 6.
|
| 18214 |
"trial_name": null,
|
| 18215 |
"trial_params": null
|
| 18216 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 9.903505347802326,
|
| 5 |
+
"global_step": 920000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 18206 |
"eval_samples_per_second": 861.024,
|
| 18207 |
"eval_steps_per_second": 13.495,
|
| 18208 |
"step": 910000
|
| 18209 |
+
},
|
| 18210 |
+
{
|
| 18211 |
+
"epoch": 9.8,
|
| 18212 |
+
"learning_rate": 1.3043645183837645e-05,
|
| 18213 |
+
"loss": 0.1817,
|
| 18214 |
+
"step": 910500
|
| 18215 |
+
},
|
| 18216 |
+
{
|
| 18217 |
+
"epoch": 9.8,
|
| 18218 |
+
"learning_rate": 1.3009978131197669e-05,
|
| 18219 |
+
"loss": 0.1821,
|
| 18220 |
+
"step": 911000
|
| 18221 |
+
},
|
| 18222 |
+
{
|
| 18223 |
+
"epoch": 9.8,
|
| 18224 |
+
"eval_loss": 0.17097479104995728,
|
| 18225 |
+
"eval_runtime": 2.7046,
|
| 18226 |
+
"eval_samples_per_second": 849.303,
|
| 18227 |
+
"eval_steps_per_second": 13.311,
|
| 18228 |
+
"step": 911000
|
| 18229 |
+
},
|
| 18230 |
+
{
|
| 18231 |
+
"epoch": 9.81,
|
| 18232 |
+
"learning_rate": 1.297649422671947e-05,
|
| 18233 |
+
"loss": 0.1815,
|
| 18234 |
+
"step": 911500
|
| 18235 |
+
},
|
| 18236 |
+
{
|
| 18237 |
+
"epoch": 9.81,
|
| 18238 |
+
"learning_rate": 1.2943193561946762e-05,
|
| 18239 |
+
"loss": 0.1814,
|
| 18240 |
+
"step": 912000
|
| 18241 |
+
},
|
| 18242 |
+
{
|
| 18243 |
+
"epoch": 9.81,
|
| 18244 |
+
"eval_loss": 0.1718176305294037,
|
| 18245 |
+
"eval_runtime": 2.6645,
|
| 18246 |
+
"eval_samples_per_second": 862.075,
|
| 18247 |
+
"eval_steps_per_second": 13.511,
|
| 18248 |
+
"step": 912000
|
| 18249 |
+
},
|
| 18250 |
+
{
|
| 18251 |
+
"epoch": 9.82,
|
| 18252 |
+
"learning_rate": 1.291007622792231e-05,
|
| 18253 |
+
"loss": 0.1816,
|
| 18254 |
+
"step": 912500
|
| 18255 |
+
},
|
| 18256 |
+
{
|
| 18257 |
+
"epoch": 9.83,
|
| 18258 |
+
"learning_rate": 1.2877142315187628e-05,
|
| 18259 |
+
"loss": 0.1818,
|
| 18260 |
+
"step": 913000
|
| 18261 |
+
},
|
| 18262 |
+
{
|
| 18263 |
+
"epoch": 9.83,
|
| 18264 |
+
"eval_loss": 0.1710846871137619,
|
| 18265 |
+
"eval_runtime": 2.7026,
|
| 18266 |
+
"eval_samples_per_second": 849.924,
|
| 18267 |
+
"eval_steps_per_second": 13.321,
|
| 18268 |
+
"step": 913000
|
| 18269 |
+
},
|
| 18270 |
+
{
|
| 18271 |
+
"epoch": 9.83,
|
| 18272 |
+
"learning_rate": 1.2844391913782773e-05,
|
| 18273 |
+
"loss": 0.182,
|
| 18274 |
+
"step": 913500
|
| 18275 |
+
},
|
| 18276 |
+
{
|
| 18277 |
+
"epoch": 9.84,
|
| 18278 |
+
"learning_rate": 1.28118251132461e-05,
|
| 18279 |
+
"loss": 0.1815,
|
| 18280 |
+
"step": 914000
|
| 18281 |
+
},
|
| 18282 |
+
{
|
| 18283 |
+
"epoch": 9.84,
|
| 18284 |
+
"eval_loss": 0.17333008348941803,
|
| 18285 |
+
"eval_runtime": 2.5502,
|
| 18286 |
+
"eval_samples_per_second": 900.72,
|
| 18287 |
+
"eval_steps_per_second": 14.117,
|
| 18288 |
+
"step": 914000
|
| 18289 |
+
},
|
| 18290 |
+
{
|
| 18291 |
+
"epoch": 9.84,
|
| 18292 |
+
"learning_rate": 1.2779442002613984e-05,
|
| 18293 |
+
"loss": 0.1814,
|
| 18294 |
+
"step": 914500
|
| 18295 |
+
},
|
| 18296 |
+
{
|
| 18297 |
+
"epoch": 9.85,
|
| 18298 |
+
"learning_rate": 1.274724267042063e-05,
|
| 18299 |
+
"loss": 0.1814,
|
| 18300 |
+
"step": 915000
|
| 18301 |
+
},
|
| 18302 |
+
{
|
| 18303 |
+
"epoch": 9.85,
|
| 18304 |
+
"eval_loss": 0.1717572808265686,
|
| 18305 |
+
"eval_runtime": 2.636,
|
| 18306 |
+
"eval_samples_per_second": 871.395,
|
| 18307 |
+
"eval_steps_per_second": 13.657,
|
| 18308 |
+
"step": 915000
|
| 18309 |
+
},
|
| 18310 |
+
{
|
| 18311 |
+
"epoch": 9.85,
|
| 18312 |
+
"learning_rate": 1.2715227204697775e-05,
|
| 18313 |
+
"loss": 0.1814,
|
| 18314 |
+
"step": 915500
|
| 18315 |
+
},
|
| 18316 |
+
{
|
| 18317 |
+
"epoch": 9.86,
|
| 18318 |
+
"learning_rate": 1.2683395692974472e-05,
|
| 18319 |
+
"loss": 0.1819,
|
| 18320 |
+
"step": 916000
|
| 18321 |
+
},
|
| 18322 |
+
{
|
| 18323 |
+
"epoch": 9.86,
|
| 18324 |
+
"eval_loss": 0.17225094139575958,
|
| 18325 |
+
"eval_runtime": 2.6373,
|
| 18326 |
+
"eval_samples_per_second": 870.95,
|
| 18327 |
+
"eval_steps_per_second": 13.65,
|
| 18328 |
+
"step": 916000
|
| 18329 |
+
},
|
| 18330 |
+
{
|
| 18331 |
+
"epoch": 9.86,
|
| 18332 |
+
"learning_rate": 1.2651748222276879e-05,
|
| 18333 |
+
"loss": 0.1815,
|
| 18334 |
+
"step": 916500
|
| 18335 |
+
},
|
| 18336 |
+
{
|
| 18337 |
+
"epoch": 9.87,
|
| 18338 |
+
"learning_rate": 1.2620284879127947e-05,
|
| 18339 |
+
"loss": 0.1816,
|
| 18340 |
+
"step": 917000
|
| 18341 |
+
},
|
| 18342 |
+
{
|
| 18343 |
+
"epoch": 9.87,
|
| 18344 |
+
"eval_loss": 0.17146611213684082,
|
| 18345 |
+
"eval_runtime": 2.7493,
|
| 18346 |
+
"eval_samples_per_second": 835.499,
|
| 18347 |
+
"eval_steps_per_second": 13.094,
|
| 18348 |
+
"step": 917000
|
| 18349 |
+
},
|
| 18350 |
+
{
|
| 18351 |
+
"epoch": 9.88,
|
| 18352 |
+
"learning_rate": 1.2589005749547281e-05,
|
| 18353 |
+
"loss": 0.1815,
|
| 18354 |
+
"step": 917500
|
| 18355 |
+
},
|
| 18356 |
+
{
|
| 18357 |
+
"epoch": 9.88,
|
| 18358 |
+
"learning_rate": 1.2557910919050803e-05,
|
| 18359 |
+
"loss": 0.1813,
|
| 18360 |
+
"step": 918000
|
| 18361 |
+
},
|
| 18362 |
+
{
|
| 18363 |
+
"epoch": 9.88,
|
| 18364 |
+
"eval_loss": 0.16947948932647705,
|
| 18365 |
+
"eval_runtime": 2.7503,
|
| 18366 |
+
"eval_samples_per_second": 835.189,
|
| 18367 |
+
"eval_steps_per_second": 13.09,
|
| 18368 |
+
"step": 918000
|
| 18369 |
+
},
|
| 18370 |
+
{
|
| 18371 |
+
"epoch": 9.89,
|
| 18372 |
+
"learning_rate": 1.2527000472650597e-05,
|
| 18373 |
+
"loss": 0.1815,
|
| 18374 |
+
"step": 918500
|
| 18375 |
+
},
|
| 18376 |
+
{
|
| 18377 |
+
"epoch": 9.89,
|
| 18378 |
+
"learning_rate": 1.2496274494854666e-05,
|
| 18379 |
+
"loss": 0.1812,
|
| 18380 |
+
"step": 919000
|
| 18381 |
+
},
|
| 18382 |
+
{
|
| 18383 |
+
"epoch": 9.89,
|
| 18384 |
+
"eval_loss": 0.17127934098243713,
|
| 18385 |
+
"eval_runtime": 2.6734,
|
| 18386 |
+
"eval_samples_per_second": 859.205,
|
| 18387 |
+
"eval_steps_per_second": 13.466,
|
| 18388 |
+
"step": 919000
|
| 18389 |
+
},
|
| 18390 |
+
{
|
| 18391 |
+
"epoch": 9.9,
|
| 18392 |
+
"learning_rate": 1.2465733069666629e-05,
|
| 18393 |
+
"loss": 0.1813,
|
| 18394 |
+
"step": 919500
|
| 18395 |
+
},
|
| 18396 |
+
{
|
| 18397 |
+
"epoch": 9.9,
|
| 18398 |
+
"learning_rate": 1.24353762805856e-05,
|
| 18399 |
+
"loss": 0.1814,
|
| 18400 |
+
"step": 920000
|
| 18401 |
+
},
|
| 18402 |
+
{
|
| 18403 |
+
"epoch": 9.9,
|
| 18404 |
+
"eval_loss": 0.17229017615318298,
|
| 18405 |
+
"eval_runtime": 2.6708,
|
| 18406 |
+
"eval_samples_per_second": 860.053,
|
| 18407 |
+
"eval_steps_per_second": 13.479,
|
| 18408 |
+
"step": 920000
|
| 18409 |
}
|
| 18410 |
],
|
| 18411 |
"max_steps": 1000000,
|
| 18412 |
"num_train_epochs": 12,
|
| 18413 |
+
"total_flos": 6.449174726764297e+22,
|
| 18414 |
"trial_name": null,
|
| 18415 |
"trial_params": null
|
| 18416 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49c53e22d617d625d68b0b7c24d68f147254236ebb3b272bd10eecf6d93598e9
|
| 3 |
size 449471589
|