Training in progress, step 25000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +103 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893438545
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f0d203a8c7bacd9049d0a9a6ba66771bad7db3da1b0e849bcf26a2083ccab635
|
| 3 |
size 893438545
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83bcb89c6daff571003c7df9aae49dc4a313a7404638fcb7d95f82b950d2e5af
|
| 3 |
size 449471589
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15523
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3271ed48861b853ff2a93ab2d113124282a36f76af112eacd53eeaa11994564a
|
| 3 |
size 15523
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 559
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:865ba51ccd1f35f320c7110ccb893ffd337376d4652e722731792c01668ba190
|
| 3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3041a56ac9f847b3d8ba49ecd2e74fdd80acf3c5d07444653f5e498839336c44
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -406,11 +406,111 @@
|
|
| 406 |
"eval_samples_per_second": 32.434,
|
| 407 |
"eval_steps_per_second": 1.038,
|
| 408 |
"step": 20000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
}
|
| 410 |
],
|
| 411 |
"max_steps": 1000000,
|
| 412 |
"num_train_epochs": 86,
|
| 413 |
-
"total_flos":
|
| 414 |
"trial_name": null,
|
| 415 |
"trial_params": null
|
| 416 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.1265736645117386,
|
| 5 |
+
"global_step": 25000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 406 |
"eval_samples_per_second": 32.434,
|
| 407 |
"eval_steps_per_second": 1.038,
|
| 408 |
"step": 20000
|
| 409 |
+
},
|
| 410 |
+
{
|
| 411 |
+
"epoch": 1.74,
|
| 412 |
+
"learning_rate": 9.999999999999999e-06,
|
| 413 |
+
"loss": 0.4313,
|
| 414 |
+
"step": 20500
|
| 415 |
+
},
|
| 416 |
+
{
|
| 417 |
+
"epoch": 1.79,
|
| 418 |
+
"learning_rate": 9.999999999999999e-06,
|
| 419 |
+
"loss": 0.4296,
|
| 420 |
+
"step": 21000
|
| 421 |
+
},
|
| 422 |
+
{
|
| 423 |
+
"epoch": 1.79,
|
| 424 |
+
"eval_loss": 0.39878711104393005,
|
| 425 |
+
"eval_runtime": 16.1844,
|
| 426 |
+
"eval_samples_per_second": 30.894,
|
| 427 |
+
"eval_steps_per_second": 0.989,
|
| 428 |
+
"step": 21000
|
| 429 |
+
},
|
| 430 |
+
{
|
| 431 |
+
"epoch": 1.83,
|
| 432 |
+
"learning_rate": 9.999999999999999e-06,
|
| 433 |
+
"loss": 0.4288,
|
| 434 |
+
"step": 21500
|
| 435 |
+
},
|
| 436 |
+
{
|
| 437 |
+
"epoch": 1.87,
|
| 438 |
+
"learning_rate": 9.999999999999999e-06,
|
| 439 |
+
"loss": 0.4278,
|
| 440 |
+
"step": 22000
|
| 441 |
+
},
|
| 442 |
+
{
|
| 443 |
+
"epoch": 1.87,
|
| 444 |
+
"eval_loss": 0.3984658718109131,
|
| 445 |
+
"eval_runtime": 17.0912,
|
| 446 |
+
"eval_samples_per_second": 29.255,
|
| 447 |
+
"eval_steps_per_second": 0.936,
|
| 448 |
+
"step": 22000
|
| 449 |
+
},
|
| 450 |
+
{
|
| 451 |
+
"epoch": 1.91,
|
| 452 |
+
"learning_rate": 9.999999999999999e-06,
|
| 453 |
+
"loss": 0.4278,
|
| 454 |
+
"step": 22500
|
| 455 |
+
},
|
| 456 |
+
{
|
| 457 |
+
"epoch": 1.96,
|
| 458 |
+
"learning_rate": 9.999999999999999e-06,
|
| 459 |
+
"loss": 0.4276,
|
| 460 |
+
"step": 23000
|
| 461 |
+
},
|
| 462 |
+
{
|
| 463 |
+
"epoch": 1.96,
|
| 464 |
+
"eval_loss": 0.3981262743473053,
|
| 465 |
+
"eval_runtime": 16.5906,
|
| 466 |
+
"eval_samples_per_second": 30.138,
|
| 467 |
+
"eval_steps_per_second": 0.964,
|
| 468 |
+
"step": 23000
|
| 469 |
+
},
|
| 470 |
+
{
|
| 471 |
+
"epoch": 2.0,
|
| 472 |
+
"learning_rate": 9.999999999999999e-06,
|
| 473 |
+
"loss": 0.428,
|
| 474 |
+
"step": 23500
|
| 475 |
+
},
|
| 476 |
+
{
|
| 477 |
+
"epoch": 2.04,
|
| 478 |
+
"learning_rate": 9.999999999999999e-06,
|
| 479 |
+
"loss": 0.4264,
|
| 480 |
+
"step": 24000
|
| 481 |
+
},
|
| 482 |
+
{
|
| 483 |
+
"epoch": 2.04,
|
| 484 |
+
"eval_loss": 0.39774054288864136,
|
| 485 |
+
"eval_runtime": 24.4452,
|
| 486 |
+
"eval_samples_per_second": 20.454,
|
| 487 |
+
"eval_steps_per_second": 0.655,
|
| 488 |
+
"step": 24000
|
| 489 |
+
},
|
| 490 |
+
{
|
| 491 |
+
"epoch": 2.08,
|
| 492 |
+
"learning_rate": 9.999999999999999e-06,
|
| 493 |
+
"loss": 0.427,
|
| 494 |
+
"step": 24500
|
| 495 |
+
},
|
| 496 |
+
{
|
| 497 |
+
"epoch": 2.13,
|
| 498 |
+
"learning_rate": 9.999999999999999e-06,
|
| 499 |
+
"loss": 0.4267,
|
| 500 |
+
"step": 25000
|
| 501 |
+
},
|
| 502 |
+
{
|
| 503 |
+
"epoch": 2.13,
|
| 504 |
+
"eval_loss": 0.3962687849998474,
|
| 505 |
+
"eval_runtime": 16.5048,
|
| 506 |
+
"eval_samples_per_second": 30.294,
|
| 507 |
+
"eval_steps_per_second": 0.969,
|
| 508 |
+
"step": 25000
|
| 509 |
}
|
| 510 |
],
|
| 511 |
"max_steps": 1000000,
|
| 512 |
"num_train_epochs": 86,
|
| 513 |
+
"total_flos": 1.1500501101744764e+21,
|
| 514 |
"trial_name": null,
|
| 515 |
"trial_params": null
|
| 516 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83bcb89c6daff571003c7df9aae49dc4a313a7404638fcb7d95f82b950d2e5af
|
| 3 |
size 449471589
|