Training in progress, step 470000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 202194449
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5de66d4a93d929dbf59bf082b441005c55284067d059fcb692905debf93d43ec
|
| 3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b866106aa5570064c241431938c823c9b3d9c0359a68aaf12dac64797d2c681f
|
| 3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0939dfbb6ece3c207cb8da7c45398fb3890f9eac2a2a8a7c463fa15eb93581f8
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0939dfbb6ece3c207cb8da7c45398fb3890f9eac2a2a8a7c463fa15eb93581f8
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0939dfbb6ece3c207cb8da7c45398fb3890f9eac2a2a8a7c463fa15eb93581f8
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0939dfbb6ece3c207cb8da7c45398fb3890f9eac2a2a8a7c463fa15eb93581f8
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0939dfbb6ece3c207cb8da7c45398fb3890f9eac2a2a8a7c463fa15eb93581f8
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0939dfbb6ece3c207cb8da7c45398fb3890f9eac2a2a8a7c463fa15eb93581f8
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0939dfbb6ece3c207cb8da7c45398fb3890f9eac2a2a8a7c463fa15eb93581f8
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0939dfbb6ece3c207cb8da7c45398fb3890f9eac2a2a8a7c463fa15eb93581f8
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d15e11a6de6abd55121a73bc214cc950fb971f927ae2b1d5067145da50de5d0
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 10.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -9206,11 +9206,211 @@
|
|
| 9206 |
"eval_samples_per_second": 1173.539,
|
| 9207 |
"eval_steps_per_second": 18.392,
|
| 9208 |
"step": 460000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9209 |
}
|
| 9210 |
],
|
| 9211 |
"max_steps": 500000,
|
| 9212 |
"num_train_epochs": 12,
|
| 9213 |
-
"total_flos": 1.
|
| 9214 |
"trial_name": null,
|
| 9215 |
"trial_params": null
|
| 9216 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 10.483583154889365,
|
| 5 |
+
"global_step": 470000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 9206 |
"eval_samples_per_second": 1173.539,
|
| 9207 |
"eval_steps_per_second": 18.392,
|
| 9208 |
"step": 460000
|
| 9209 |
+
},
|
| 9210 |
+
{
|
| 9211 |
+
"epoch": 10.27,
|
| 9212 |
+
"learning_rate": 1.4920092237448903e-05,
|
| 9213 |
+
"loss": 0.2554,
|
| 9214 |
+
"step": 460500
|
| 9215 |
+
},
|
| 9216 |
+
{
|
| 9217 |
+
"epoch": 10.28,
|
| 9218 |
+
"learning_rate": 1.4797008356541874e-05,
|
| 9219 |
+
"loss": 0.2548,
|
| 9220 |
+
"step": 461000
|
| 9221 |
+
},
|
| 9222 |
+
{
|
| 9223 |
+
"epoch": 10.28,
|
| 9224 |
+
"eval_loss": 0.2381051927804947,
|
| 9225 |
+
"eval_runtime": 1.9705,
|
| 9226 |
+
"eval_samples_per_second": 1165.71,
|
| 9227 |
+
"eval_steps_per_second": 18.27,
|
| 9228 |
+
"step": 461000
|
| 9229 |
+
},
|
| 9230 |
+
{
|
| 9231 |
+
"epoch": 10.29,
|
| 9232 |
+
"learning_rate": 1.4675457713088947e-05,
|
| 9233 |
+
"loss": 0.2547,
|
| 9234 |
+
"step": 461500
|
| 9235 |
+
},
|
| 9236 |
+
{
|
| 9237 |
+
"epoch": 10.31,
|
| 9238 |
+
"learning_rate": 1.4555441636348494e-05,
|
| 9239 |
+
"loss": 0.2549,
|
| 9240 |
+
"step": 462000
|
| 9241 |
+
},
|
| 9242 |
+
{
|
| 9243 |
+
"epoch": 10.31,
|
| 9244 |
+
"eval_loss": 0.23718567192554474,
|
| 9245 |
+
"eval_runtime": 2.0107,
|
| 9246 |
+
"eval_samples_per_second": 1142.393,
|
| 9247 |
+
"eval_steps_per_second": 17.904,
|
| 9248 |
+
"step": 462000
|
| 9249 |
+
},
|
| 9250 |
+
{
|
| 9251 |
+
"epoch": 10.32,
|
| 9252 |
+
"learning_rate": 1.4436961438797095e-05,
|
| 9253 |
+
"loss": 0.2547,
|
| 9254 |
+
"step": 462500
|
| 9255 |
+
},
|
| 9256 |
+
{
|
| 9257 |
+
"epoch": 10.33,
|
| 9258 |
+
"learning_rate": 1.4320018416115206e-05,
|
| 9259 |
+
"loss": 0.2548,
|
| 9260 |
+
"step": 463000
|
| 9261 |
+
},
|
| 9262 |
+
{
|
| 9263 |
+
"epoch": 10.33,
|
| 9264 |
+
"eval_loss": 0.23715750873088837,
|
| 9265 |
+
"eval_runtime": 2.0434,
|
| 9266 |
+
"eval_samples_per_second": 1124.132,
|
| 9267 |
+
"eval_steps_per_second": 17.618,
|
| 9268 |
+
"step": 463000
|
| 9269 |
+
},
|
| 9270 |
+
{
|
| 9271 |
+
"epoch": 10.34,
|
| 9272 |
+
"learning_rate": 1.4204613847173003e-05,
|
| 9273 |
+
"loss": 0.2547,
|
| 9274 |
+
"step": 463500
|
| 9275 |
+
},
|
| 9276 |
+
{
|
| 9277 |
+
"epoch": 10.35,
|
| 9278 |
+
"learning_rate": 1.4090748994016354e-05,
|
| 9279 |
+
"loss": 0.2547,
|
| 9280 |
+
"step": 464000
|
| 9281 |
+
},
|
| 9282 |
+
{
|
| 9283 |
+
"epoch": 10.35,
|
| 9284 |
+
"eval_loss": 0.23930229246616364,
|
| 9285 |
+
"eval_runtime": 2.02,
|
| 9286 |
+
"eval_samples_per_second": 1137.119,
|
| 9287 |
+
"eval_steps_per_second": 17.822,
|
| 9288 |
+
"step": 464000
|
| 9289 |
+
},
|
| 9290 |
+
{
|
| 9291 |
+
"epoch": 10.36,
|
| 9292 |
+
"learning_rate": 1.3978425101853049e-05,
|
| 9293 |
+
"loss": 0.2545,
|
| 9294 |
+
"step": 464500
|
| 9295 |
+
},
|
| 9296 |
+
{
|
| 9297 |
+
"epoch": 10.37,
|
| 9298 |
+
"learning_rate": 1.3867643399039165e-05,
|
| 9299 |
+
"loss": 0.2546,
|
| 9300 |
+
"step": 465000
|
| 9301 |
+
},
|
| 9302 |
+
{
|
| 9303 |
+
"epoch": 10.37,
|
| 9304 |
+
"eval_loss": 0.24023665487766266,
|
| 9305 |
+
"eval_runtime": 2.0161,
|
| 9306 |
+
"eval_samples_per_second": 1139.319,
|
| 9307 |
+
"eval_steps_per_second": 17.856,
|
| 9308 |
+
"step": 465000
|
| 9309 |
+
},
|
| 9310 |
+
{
|
| 9311 |
+
"epoch": 10.38,
|
| 9312 |
+
"learning_rate": 1.3758405097065648e-05,
|
| 9313 |
+
"loss": 0.2547,
|
| 9314 |
+
"step": 465500
|
| 9315 |
+
},
|
| 9316 |
+
{
|
| 9317 |
+
"epoch": 10.39,
|
| 9318 |
+
"learning_rate": 1.3650711390545131e-05,
|
| 9319 |
+
"loss": 0.2549,
|
| 9320 |
+
"step": 466000
|
| 9321 |
+
},
|
| 9322 |
+
{
|
| 9323 |
+
"epoch": 10.39,
|
| 9324 |
+
"eval_loss": 0.2383406162261963,
|
| 9325 |
+
"eval_runtime": 2.0372,
|
| 9326 |
+
"eval_samples_per_second": 1127.524,
|
| 9327 |
+
"eval_steps_per_second": 17.671,
|
| 9328 |
+
"step": 466000
|
| 9329 |
+
},
|
| 9330 |
+
{
|
| 9331 |
+
"epoch": 10.41,
|
| 9332 |
+
"learning_rate": 1.3544563457198657e-05,
|
| 9333 |
+
"loss": 0.2546,
|
| 9334 |
+
"step": 466500
|
| 9335 |
+
},
|
| 9336 |
+
{
|
| 9337 |
+
"epoch": 10.42,
|
| 9338 |
+
"learning_rate": 1.343996245784307e-05,
|
| 9339 |
+
"loss": 0.2545,
|
| 9340 |
+
"step": 467000
|
| 9341 |
+
},
|
| 9342 |
+
{
|
| 9343 |
+
"epoch": 10.42,
|
| 9344 |
+
"eval_loss": 0.23841743171215057,
|
| 9345 |
+
"eval_runtime": 1.9888,
|
| 9346 |
+
"eval_samples_per_second": 1154.948,
|
| 9347 |
+
"eval_steps_per_second": 18.101,
|
| 9348 |
+
"step": 467000
|
| 9349 |
+
},
|
| 9350 |
+
{
|
| 9351 |
+
"epoch": 10.43,
|
| 9352 |
+
"learning_rate": 1.3336909536378107e-05,
|
| 9353 |
+
"loss": 0.2549,
|
| 9354 |
+
"step": 467500
|
| 9355 |
+
},
|
| 9356 |
+
{
|
| 9357 |
+
"epoch": 10.44,
|
| 9358 |
+
"learning_rate": 1.3235405819774022e-05,
|
| 9359 |
+
"loss": 0.2544,
|
| 9360 |
+
"step": 468000
|
| 9361 |
+
},
|
| 9362 |
+
{
|
| 9363 |
+
"epoch": 10.44,
|
| 9364 |
+
"eval_loss": 0.2374790459871292,
|
| 9365 |
+
"eval_runtime": 2.0476,
|
| 9366 |
+
"eval_samples_per_second": 1121.805,
|
| 9367 |
+
"eval_steps_per_second": 17.582,
|
| 9368 |
+
"step": 468000
|
| 9369 |
+
},
|
| 9370 |
+
{
|
| 9371 |
+
"epoch": 10.45,
|
| 9372 |
+
"learning_rate": 1.3135452418059208e-05,
|
| 9373 |
+
"loss": 0.2543,
|
| 9374 |
+
"step": 468500
|
| 9375 |
+
},
|
| 9376 |
+
{
|
| 9377 |
+
"epoch": 10.46,
|
| 9378 |
+
"learning_rate": 1.3037050424308027e-05,
|
| 9379 |
+
"loss": 0.2544,
|
| 9380 |
+
"step": 469000
|
| 9381 |
+
},
|
| 9382 |
+
{
|
| 9383 |
+
"epoch": 10.46,
|
| 9384 |
+
"eval_loss": 0.2366662174463272,
|
| 9385 |
+
"eval_runtime": 2.0397,
|
| 9386 |
+
"eval_samples_per_second": 1126.142,
|
| 9387 |
+
"eval_steps_per_second": 17.65,
|
| 9388 |
+
"step": 469000
|
| 9389 |
+
},
|
| 9390 |
+
{
|
| 9391 |
+
"epoch": 10.47,
|
| 9392 |
+
"learning_rate": 1.2940200914628945e-05,
|
| 9393 |
+
"loss": 0.2547,
|
| 9394 |
+
"step": 469500
|
| 9395 |
+
},
|
| 9396 |
+
{
|
| 9397 |
+
"epoch": 10.48,
|
| 9398 |
+
"learning_rate": 1.2844904948152644e-05,
|
| 9399 |
+
"loss": 0.255,
|
| 9400 |
+
"step": 470000
|
| 9401 |
+
},
|
| 9402 |
+
{
|
| 9403 |
+
"epoch": 10.48,
|
| 9404 |
+
"eval_loss": 0.23585031926631927,
|
| 9405 |
+
"eval_runtime": 1.91,
|
| 9406 |
+
"eval_samples_per_second": 1202.649,
|
| 9407 |
+
"eval_steps_per_second": 18.849,
|
| 9408 |
+
"step": 470000
|
| 9409 |
}
|
| 9410 |
],
|
| 9411 |
"max_steps": 500000,
|
| 9412 |
"num_train_epochs": 12,
|
| 9413 |
+
"total_flos": 1.5015610979787347e+22,
|
| 9414 |
"trial_name": null,
|
| 9415 |
"trial_params": null
|
| 9416 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b866106aa5570064c241431938c823c9b3d9c0359a68aaf12dac64797d2c681f
|
| 3 |
size 102501541
|