Training in progress, step 360000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44471d6e6546be5ac1a0d86dea95ba4d44ec44baa5148bbd72a7ea895ad69cfc
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d3b9e249ae21e66394d1e5adda08ca3c78e35cfc386e28fe333440be7a14450
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8cd407f01a45e91c6a9d73f9ffcc5948c50f62ac1349333301934ceecd28bde2
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10ca9bf76c7cf63afb390947106325fd549859bdbd17156e672be09fdd4b8f4d
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:458dbb378f2ef2f1049b9621261d1e352171f603268c570c20cc0831e3c801af
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed71a40157ab7c8a370261156f0220243926bfa7450a89c6374ca93f070e4120
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a42c32ab3c49e09d799093ca137ee6e22777a2749e499367cd831d70ce83fb58
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 5.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -2596,11 +2596,85 @@
|
|
| 2596 |
"eval_samples_per_second": 971.32,
|
| 2597 |
"eval_steps_per_second": 15.541,
|
| 2598 |
"step": 350000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2599 |
}
|
| 2600 |
],
|
| 2601 |
"max_steps": 1000000,
|
| 2602 |
"num_train_epochs": 16,
|
| 2603 |
-
"total_flos": 2.
|
| 2604 |
"trial_name": null,
|
| 2605 |
"trial_params": null
|
| 2606 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 5.497274268175363,
|
| 5 |
+
"global_step": 360000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 2596 |
"eval_samples_per_second": 971.32,
|
| 2597 |
"eval_steps_per_second": 15.541,
|
| 2598 |
"step": 350000
|
| 2599 |
+
},
|
| 2600 |
+
{
|
| 2601 |
+
"epoch": 5.36,
|
| 2602 |
+
"learning_rate": 0.00011809236994438816,
|
| 2603 |
+
"loss": 0.2831,
|
| 2604 |
+
"step": 351000
|
| 2605 |
+
},
|
| 2606 |
+
{
|
| 2607 |
+
"epoch": 5.38,
|
| 2608 |
+
"learning_rate": 0.00011789795224815164,
|
| 2609 |
+
"loss": 0.2827,
|
| 2610 |
+
"step": 352000
|
| 2611 |
+
},
|
| 2612 |
+
{
|
| 2613 |
+
"epoch": 5.39,
|
| 2614 |
+
"learning_rate": 0.00011770312010598116,
|
| 2615 |
+
"loss": 0.282,
|
| 2616 |
+
"step": 353000
|
| 2617 |
+
},
|
| 2618 |
+
{
|
| 2619 |
+
"epoch": 5.41,
|
| 2620 |
+
"learning_rate": 0.00011750787564852973,
|
| 2621 |
+
"loss": 0.2822,
|
| 2622 |
+
"step": 354000
|
| 2623 |
+
},
|
| 2624 |
+
{
|
| 2625 |
+
"epoch": 5.42,
|
| 2626 |
+
"learning_rate": 0.00011731222101095955,
|
| 2627 |
+
"loss": 0.2825,
|
| 2628 |
+
"step": 355000
|
| 2629 |
+
},
|
| 2630 |
+
{
|
| 2631 |
+
"epoch": 5.42,
|
| 2632 |
+
"eval_runtime": 1.0697,
|
| 2633 |
+
"eval_samples_per_second": 934.885,
|
| 2634 |
+
"eval_steps_per_second": 14.958,
|
| 2635 |
+
"step": 355000
|
| 2636 |
+
},
|
| 2637 |
+
{
|
| 2638 |
+
"epoch": 5.44,
|
| 2639 |
+
"learning_rate": 0.00011711615833291833,
|
| 2640 |
+
"loss": 0.2822,
|
| 2641 |
+
"step": 356000
|
| 2642 |
+
},
|
| 2643 |
+
{
|
| 2644 |
+
"epoch": 5.45,
|
| 2645 |
+
"learning_rate": 0.0001169196897585161,
|
| 2646 |
+
"loss": 0.2824,
|
| 2647 |
+
"step": 357000
|
| 2648 |
+
},
|
| 2649 |
+
{
|
| 2650 |
+
"epoch": 5.47,
|
| 2651 |
+
"learning_rate": 0.00011672281743630175,
|
| 2652 |
+
"loss": 0.2818,
|
| 2653 |
+
"step": 358000
|
| 2654 |
+
},
|
| 2655 |
+
{
|
| 2656 |
+
"epoch": 5.48,
|
| 2657 |
+
"learning_rate": 0.0001165255435192394,
|
| 2658 |
+
"loss": 0.2815,
|
| 2659 |
+
"step": 359000
|
| 2660 |
+
},
|
| 2661 |
+
{
|
| 2662 |
+
"epoch": 5.5,
|
| 2663 |
+
"learning_rate": 0.00011632787016468506,
|
| 2664 |
+
"loss": 0.2819,
|
| 2665 |
+
"step": 360000
|
| 2666 |
+
},
|
| 2667 |
+
{
|
| 2668 |
+
"epoch": 5.5,
|
| 2669 |
+
"eval_runtime": 1.1008,
|
| 2670 |
+
"eval_samples_per_second": 908.433,
|
| 2671 |
+
"eval_steps_per_second": 14.535,
|
| 2672 |
+
"step": 360000
|
| 2673 |
}
|
| 2674 |
],
|
| 2675 |
"max_steps": 1000000,
|
| 2676 |
"num_train_epochs": 16,
|
| 2677 |
+
"total_flos": 2.5236061117517534e+22,
|
| 2678 |
"trial_name": null,
|
| 2679 |
"trial_params": null
|
| 2680 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d3b9e249ae21e66394d1e5adda08ca3c78e35cfc386e28fe333440be7a14450
|
| 3 |
size 449471589
|