Training in progress, step 870000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4a2010561ae6b219703766f77123488046a173556bad32795a979a0714f6e7c
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f70f0274e15c89ba1e5e2f894493b1d0b23475cd923b06c04110b0afc32880fa
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55186c11fcb4a70528459949af80ac1b2eb9ca8f4193cbb9f48d92b908839fa5
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55186c11fcb4a70528459949af80ac1b2eb9ca8f4193cbb9f48d92b908839fa5
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55186c11fcb4a70528459949af80ac1b2eb9ca8f4193cbb9f48d92b908839fa5
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55186c11fcb4a70528459949af80ac1b2eb9ca8f4193cbb9f48d92b908839fa5
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55186c11fcb4a70528459949af80ac1b2eb9ca8f4193cbb9f48d92b908839fa5
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55186c11fcb4a70528459949af80ac1b2eb9ca8f4193cbb9f48d92b908839fa5
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55186c11fcb4a70528459949af80ac1b2eb9ca8f4193cbb9f48d92b908839fa5
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55186c11fcb4a70528459949af80ac1b2eb9ca8f4193cbb9f48d92b908839fa5
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d368f4f79d735aeb82977d11fd8d84913a3919ff8ecbae0982e3d606c331447e
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 9.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -17206,11 +17206,211 @@
|
|
| 17206 |
"eval_samples_per_second": 862.261,
|
| 17207 |
"eval_steps_per_second": 13.514,
|
| 17208 |
"step": 860000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17209 |
}
|
| 17210 |
],
|
| 17211 |
"max_steps": 1000000,
|
| 17212 |
"num_train_epochs": 12,
|
| 17213 |
-
"total_flos": 6.
|
| 17214 |
"trial_name": null,
|
| 17215 |
"trial_params": null
|
| 17216 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 9.345861726687708,
|
| 5 |
+
"global_step": 870000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 17206 |
"eval_samples_per_second": 862.261,
|
| 17207 |
"eval_steps_per_second": 13.514,
|
| 17208 |
"step": 860000
|
| 17209 |
+
},
|
| 17210 |
+
{
|
| 17211 |
+
"epoch": 9.24,
|
| 17212 |
+
"learning_rate": 1.7317340758783407e-05,
|
| 17213 |
+
"loss": 0.1835,
|
| 17214 |
+
"step": 860500
|
| 17215 |
+
},
|
| 17216 |
+
{
|
| 17217 |
+
"epoch": 9.25,
|
| 17218 |
+
"learning_rate": 1.726590597701708e-05,
|
| 17219 |
+
"loss": 0.1842,
|
| 17220 |
+
"step": 861000
|
| 17221 |
+
},
|
| 17222 |
+
{
|
| 17223 |
+
"epoch": 9.25,
|
| 17224 |
+
"eval_loss": 0.17551767826080322,
|
| 17225 |
+
"eval_runtime": 2.6099,
|
| 17226 |
+
"eval_samples_per_second": 880.108,
|
| 17227 |
+
"eval_steps_per_second": 13.794,
|
| 17228 |
+
"step": 861000
|
| 17229 |
+
},
|
| 17230 |
+
{
|
| 17231 |
+
"epoch": 9.25,
|
| 17232 |
+
"learning_rate": 1.7214642707868325e-05,
|
| 17233 |
+
"loss": 0.1839,
|
| 17234 |
+
"step": 861500
|
| 17235 |
+
},
|
| 17236 |
+
{
|
| 17237 |
+
"epoch": 9.26,
|
| 17238 |
+
"learning_rate": 1.7163551091488952e-05,
|
| 17239 |
+
"loss": 0.1839,
|
| 17240 |
+
"step": 862000
|
| 17241 |
+
},
|
| 17242 |
+
{
|
| 17243 |
+
"epoch": 9.26,
|
| 17244 |
+
"eval_loss": 0.17372268438339233,
|
| 17245 |
+
"eval_runtime": 2.641,
|
| 17246 |
+
"eval_samples_per_second": 869.76,
|
| 17247 |
+
"eval_steps_per_second": 13.631,
|
| 17248 |
+
"step": 862000
|
| 17249 |
+
},
|
| 17250 |
+
{
|
| 17251 |
+
"epoch": 9.26,
|
| 17252 |
+
"learning_rate": 1.711263126756148e-05,
|
| 17253 |
+
"loss": 0.1841,
|
| 17254 |
+
"step": 862500
|
| 17255 |
+
},
|
| 17256 |
+
{
|
| 17257 |
+
"epoch": 9.27,
|
| 17258 |
+
"learning_rate": 1.7061883375298788e-05,
|
| 17259 |
+
"loss": 0.1834,
|
| 17260 |
+
"step": 863000
|
| 17261 |
+
},
|
| 17262 |
+
{
|
| 17263 |
+
"epoch": 9.27,
|
| 17264 |
+
"eval_loss": 0.17352163791656494,
|
| 17265 |
+
"eval_runtime": 2.6082,
|
| 17266 |
+
"eval_samples_per_second": 880.7,
|
| 17267 |
+
"eval_steps_per_second": 13.803,
|
| 17268 |
+
"step": 863000
|
| 17269 |
+
},
|
| 17270 |
+
{
|
| 17271 |
+
"epoch": 9.27,
|
| 17272 |
+
"learning_rate": 1.7011307553443647e-05,
|
| 17273 |
+
"loss": 0.1837,
|
| 17274 |
+
"step": 863500
|
| 17275 |
+
},
|
| 17276 |
+
{
|
| 17277 |
+
"epoch": 9.28,
|
| 17278 |
+
"learning_rate": 1.6960903940268456e-05,
|
| 17279 |
+
"loss": 0.1836,
|
| 17280 |
+
"step": 864000
|
| 17281 |
+
},
|
| 17282 |
+
{
|
| 17283 |
+
"epoch": 9.28,
|
| 17284 |
+
"eval_loss": 0.17270448803901672,
|
| 17285 |
+
"eval_runtime": 2.6998,
|
| 17286 |
+
"eval_samples_per_second": 850.79,
|
| 17287 |
+
"eval_steps_per_second": 13.334,
|
| 17288 |
+
"step": 864000
|
| 17289 |
+
},
|
| 17290 |
+
{
|
| 17291 |
+
"epoch": 9.28,
|
| 17292 |
+
"learning_rate": 1.6910672673574746e-05,
|
| 17293 |
+
"loss": 0.1838,
|
| 17294 |
+
"step": 864500
|
| 17295 |
+
},
|
| 17296 |
+
{
|
| 17297 |
+
"epoch": 9.29,
|
| 17298 |
+
"learning_rate": 1.6860613890692876e-05,
|
| 17299 |
+
"loss": 0.1832,
|
| 17300 |
+
"step": 865000
|
| 17301 |
+
},
|
| 17302 |
+
{
|
| 17303 |
+
"epoch": 9.29,
|
| 17304 |
+
"eval_loss": 0.1736259162425995,
|
| 17305 |
+
"eval_runtime": 2.674,
|
| 17306 |
+
"eval_samples_per_second": 859.019,
|
| 17307 |
+
"eval_steps_per_second": 13.463,
|
| 17308 |
+
"step": 865000
|
| 17309 |
+
},
|
| 17310 |
+
{
|
| 17311 |
+
"epoch": 9.3,
|
| 17312 |
+
"learning_rate": 1.6810727728481673e-05,
|
| 17313 |
+
"loss": 0.1833,
|
| 17314 |
+
"step": 865500
|
| 17315 |
+
},
|
| 17316 |
+
{
|
| 17317 |
+
"epoch": 9.3,
|
| 17318 |
+
"learning_rate": 1.6761014323327962e-05,
|
| 17319 |
+
"loss": 0.1835,
|
| 17320 |
+
"step": 866000
|
| 17321 |
+
},
|
| 17322 |
+
{
|
| 17323 |
+
"epoch": 9.3,
|
| 17324 |
+
"eval_loss": 0.17491458356380463,
|
| 17325 |
+
"eval_runtime": 2.6701,
|
| 17326 |
+
"eval_samples_per_second": 860.268,
|
| 17327 |
+
"eval_steps_per_second": 13.483,
|
| 17328 |
+
"step": 866000
|
| 17329 |
+
},
|
| 17330 |
+
{
|
| 17331 |
+
"epoch": 9.31,
|
| 17332 |
+
"learning_rate": 1.6711473811146333e-05,
|
| 17333 |
+
"loss": 0.1836,
|
| 17334 |
+
"step": 866500
|
| 17335 |
+
},
|
| 17336 |
+
{
|
| 17337 |
+
"epoch": 9.31,
|
| 17338 |
+
"learning_rate": 1.6662106327378645e-05,
|
| 17339 |
+
"loss": 0.1837,
|
| 17340 |
+
"step": 867000
|
| 17341 |
+
},
|
| 17342 |
+
{
|
| 17343 |
+
"epoch": 9.31,
|
| 17344 |
+
"eval_loss": 0.17311297357082367,
|
| 17345 |
+
"eval_runtime": 2.5749,
|
| 17346 |
+
"eval_samples_per_second": 892.083,
|
| 17347 |
+
"eval_steps_per_second": 13.981,
|
| 17348 |
+
"step": 867000
|
| 17349 |
+
},
|
| 17350 |
+
{
|
| 17351 |
+
"epoch": 9.32,
|
| 17352 |
+
"learning_rate": 1.6612912006993688e-05,
|
| 17353 |
+
"loss": 0.1835,
|
| 17354 |
+
"step": 867500
|
| 17355 |
+
},
|
| 17356 |
+
{
|
| 17357 |
+
"epoch": 9.32,
|
| 17358 |
+
"learning_rate": 1.6563890984486884e-05,
|
| 17359 |
+
"loss": 0.1834,
|
| 17360 |
+
"step": 868000
|
| 17361 |
+
},
|
| 17362 |
+
{
|
| 17363 |
+
"epoch": 9.32,
|
| 17364 |
+
"eval_loss": 0.1740087866783142,
|
| 17365 |
+
"eval_runtime": 2.6738,
|
| 17366 |
+
"eval_samples_per_second": 859.085,
|
| 17367 |
+
"eval_steps_per_second": 13.464,
|
| 17368 |
+
"step": 868000
|
| 17369 |
+
},
|
| 17370 |
+
{
|
| 17371 |
+
"epoch": 9.33,
|
| 17372 |
+
"learning_rate": 1.6515043393879825e-05,
|
| 17373 |
+
"loss": 0.1837,
|
| 17374 |
+
"step": 868500
|
| 17375 |
+
},
|
| 17376 |
+
{
|
| 17377 |
+
"epoch": 9.33,
|
| 17378 |
+
"learning_rate": 1.6466369368719955e-05,
|
| 17379 |
+
"loss": 0.1834,
|
| 17380 |
+
"step": 869000
|
| 17381 |
+
},
|
| 17382 |
+
{
|
| 17383 |
+
"epoch": 9.33,
|
| 17384 |
+
"eval_loss": 0.17570127546787262,
|
| 17385 |
+
"eval_runtime": 2.6358,
|
| 17386 |
+
"eval_samples_per_second": 871.447,
|
| 17387 |
+
"eval_steps_per_second": 13.658,
|
| 17388 |
+
"step": 869000
|
| 17389 |
+
},
|
| 17390 |
+
{
|
| 17391 |
+
"epoch": 9.34,
|
| 17392 |
+
"learning_rate": 1.641786904208022e-05,
|
| 17393 |
+
"loss": 0.1833,
|
| 17394 |
+
"step": 869500
|
| 17395 |
+
},
|
| 17396 |
+
{
|
| 17397 |
+
"epoch": 9.35,
|
| 17398 |
+
"learning_rate": 1.6369542546558626e-05,
|
| 17399 |
+
"loss": 0.1835,
|
| 17400 |
+
"step": 870000
|
| 17401 |
+
},
|
| 17402 |
+
{
|
| 17403 |
+
"epoch": 9.35,
|
| 17404 |
+
"eval_loss": 0.17456747591495514,
|
| 17405 |
+
"eval_runtime": 2.7306,
|
| 17406 |
+
"eval_samples_per_second": 841.196,
|
| 17407 |
+
"eval_steps_per_second": 13.184,
|
| 17408 |
+
"step": 870000
|
| 17409 |
}
|
| 17410 |
],
|
| 17411 |
"max_steps": 1000000,
|
| 17412 |
"num_train_epochs": 12,
|
| 17413 |
+
"total_flos": 6.098673421612561e+22,
|
| 17414 |
"trial_name": null,
|
| 17415 |
"trial_params": null
|
| 17416 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f70f0274e15c89ba1e5e2f894493b1d0b23475cd923b06c04110b0afc32880fa
|
| 3 |
size 449471589
|