Training in progress, step 5000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0c1e03b37e306af29004a56bbc2e3cfe78ac4558d064ded4954032563a506c0
|
| 3 |
size 36730224
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d76b1ed26fb441fb296b2bc10e92b9546db047308ea5940e5ff2ef749dcfb42
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a263bfee7c334e6fc36b6c4750e79a70eeb042f841889c6eb0ea501e4fa6ac73
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1bbcc820499d7de99bfade4a29f6e9516d1e9cb1c8de79befcf1f0d7f2a4da15
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7200a3e7cdec9309704535acc69d439ad04e127a63c3c13c5ca0a94f0236ea31
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 4.
|
| 4 |
-
"best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -782,6 +782,92 @@
|
|
| 782 |
"eval_samples_per_second": 53.143,
|
| 783 |
"eval_steps_per_second": 13.286,
|
| 784 |
"step": 4500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 785 |
}
|
| 786 |
],
|
| 787 |
"logging_steps": 50,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 5000,
|
| 3 |
+
"best_metric": 4.414160251617432,
|
| 4 |
+
"best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-5000",
|
| 5 |
+
"epoch": 0.3845858010922237,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 5000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 782 |
"eval_samples_per_second": 53.143,
|
| 783 |
"eval_steps_per_second": 13.286,
|
| 784 |
"step": 4500
|
| 785 |
+
},
|
| 786 |
+
{
|
| 787 |
+
"epoch": 0.34997307899392355,
|
| 788 |
+
"grad_norm": 4.825377941131592,
|
| 789 |
+
"learning_rate": 0.00011712731149590087,
|
| 790 |
+
"loss": 4.5321,
|
| 791 |
+
"step": 4550
|
| 792 |
+
},
|
| 793 |
+
{
|
| 794 |
+
"epoch": 0.3538189370048458,
|
| 795 |
+
"grad_norm": 3.5786240100860596,
|
| 796 |
+
"learning_rate": 0.00011686996801588176,
|
| 797 |
+
"loss": 4.5819,
|
| 798 |
+
"step": 4600
|
| 799 |
+
},
|
| 800 |
+
{
|
| 801 |
+
"epoch": 0.35766479501576803,
|
| 802 |
+
"grad_norm": 4.445742130279541,
|
| 803 |
+
"learning_rate": 0.00011661262453586264,
|
| 804 |
+
"loss": 4.5954,
|
| 805 |
+
"step": 4650
|
| 806 |
+
},
|
| 807 |
+
{
|
| 808 |
+
"epoch": 0.3615106530266903,
|
| 809 |
+
"grad_norm": 4.670301914215088,
|
| 810 |
+
"learning_rate": 0.00011635528105584354,
|
| 811 |
+
"loss": 4.3381,
|
| 812 |
+
"step": 4700
|
| 813 |
+
},
|
| 814 |
+
{
|
| 815 |
+
"epoch": 0.36535651103761246,
|
| 816 |
+
"grad_norm": 3.0563037395477295,
|
| 817 |
+
"learning_rate": 0.0001160979375758244,
|
| 818 |
+
"loss": 4.4451,
|
| 819 |
+
"step": 4750
|
| 820 |
+
},
|
| 821 |
+
{
|
| 822 |
+
"epoch": 0.36535651103761246,
|
| 823 |
+
"eval_loss": 4.503940582275391,
|
| 824 |
+
"eval_runtime": 19.0274,
|
| 825 |
+
"eval_samples_per_second": 52.556,
|
| 826 |
+
"eval_steps_per_second": 13.139,
|
| 827 |
+
"step": 4750
|
| 828 |
+
},
|
| 829 |
+
{
|
| 830 |
+
"epoch": 0.3692023690485347,
|
| 831 |
+
"grad_norm": 4.921920299530029,
|
| 832 |
+
"learning_rate": 0.00011584059409580528,
|
| 833 |
+
"loss": 4.5505,
|
| 834 |
+
"step": 4800
|
| 835 |
+
},
|
| 836 |
+
{
|
| 837 |
+
"epoch": 0.37304822705945695,
|
| 838 |
+
"grad_norm": 4.440188407897949,
|
| 839 |
+
"learning_rate": 0.00011558325061578617,
|
| 840 |
+
"loss": 4.5339,
|
| 841 |
+
"step": 4850
|
| 842 |
+
},
|
| 843 |
+
{
|
| 844 |
+
"epoch": 0.3768940850703792,
|
| 845 |
+
"grad_norm": 4.123379707336426,
|
| 846 |
+
"learning_rate": 0.00011532590713576705,
|
| 847 |
+
"loss": 4.5001,
|
| 848 |
+
"step": 4900
|
| 849 |
+
},
|
| 850 |
+
{
|
| 851 |
+
"epoch": 0.38073994308130144,
|
| 852 |
+
"grad_norm": 3.6461265087127686,
|
| 853 |
+
"learning_rate": 0.00011506856365574795,
|
| 854 |
+
"loss": 4.4704,
|
| 855 |
+
"step": 4950
|
| 856 |
+
},
|
| 857 |
+
{
|
| 858 |
+
"epoch": 0.3845858010922237,
|
| 859 |
+
"grad_norm": 4.586422443389893,
|
| 860 |
+
"learning_rate": 0.00011481122017572883,
|
| 861 |
+
"loss": 4.5607,
|
| 862 |
+
"step": 5000
|
| 863 |
+
},
|
| 864 |
+
{
|
| 865 |
+
"epoch": 0.3845858010922237,
|
| 866 |
+
"eval_loss": 4.414160251617432,
|
| 867 |
+
"eval_runtime": 18.6554,
|
| 868 |
+
"eval_samples_per_second": 53.604,
|
| 869 |
+
"eval_steps_per_second": 13.401,
|
| 870 |
+
"step": 5000
|
| 871 |
}
|
| 872 |
],
|
| 873 |
"logging_steps": 50,
|