Training in progress, step 5000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:190f0428e79c75fea98ae401d8541afc938c837a01a15c4bff27851efd1dd5ff
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1544
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0bc6fe79754f0206b6654ffb1ff34bd91c57fda8a689c690f85d10e6dc833bb2
|
| 3 |
size 1544
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c76afc96f497ed26d04fa930deb390dd280c8858f2593b0b831d0095f3a4f7e6
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a263bfee7c334e6fc36b6c4750e79a70eeb042f841889c6eb0ea501e4fa6ac73
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f5e01d0dbe696c7f72fcba4635ddf54e7178bc606b00e2f2f7ba88d1189fa69
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c41e45efc3657786e1c23c51c4aebb48328108f6732539bee7a77cf7b55107d
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 4.
|
| 4 |
-
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -782,6 +782,92 @@
|
|
| 782 |
"eval_samples_per_second": 53.931,
|
| 783 |
"eval_steps_per_second": 13.483,
|
| 784 |
"step": 4500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 785 |
}
|
| 786 |
],
|
| 787 |
"logging_steps": 50,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 5000,
|
| 3 |
+
"best_metric": 4.465761661529541,
|
| 4 |
+
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-5000",
|
| 5 |
+
"epoch": 0.3845858010922237,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 5000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 782 |
"eval_samples_per_second": 53.931,
|
| 783 |
"eval_steps_per_second": 13.483,
|
| 784 |
"step": 4500
|
| 785 |
+
},
|
| 786 |
+
{
|
| 787 |
+
"epoch": 0.34997307899392355,
|
| 788 |
+
"grad_norm": 6.404330730438232,
|
| 789 |
+
"learning_rate": 0.0001790042334363556,
|
| 790 |
+
"loss": 4.5673,
|
| 791 |
+
"step": 4550
|
| 792 |
+
},
|
| 793 |
+
{
|
| 794 |
+
"epoch": 0.3538189370048458,
|
| 795 |
+
"grad_norm": 10.212136268615723,
|
| 796 |
+
"learning_rate": 0.00017874451341453912,
|
| 797 |
+
"loss": 4.6249,
|
| 798 |
+
"step": 4600
|
| 799 |
+
},
|
| 800 |
+
{
|
| 801 |
+
"epoch": 0.35766479501576803,
|
| 802 |
+
"grad_norm": 4.401816368103027,
|
| 803 |
+
"learning_rate": 0.00017848479339272266,
|
| 804 |
+
"loss": 4.6305,
|
| 805 |
+
"step": 4650
|
| 806 |
+
},
|
| 807 |
+
{
|
| 808 |
+
"epoch": 0.3615106530266903,
|
| 809 |
+
"grad_norm": 4.710996150970459,
|
| 810 |
+
"learning_rate": 0.00017822507337090617,
|
| 811 |
+
"loss": 4.3731,
|
| 812 |
+
"step": 4700
|
| 813 |
+
},
|
| 814 |
+
{
|
| 815 |
+
"epoch": 0.36535651103761246,
|
| 816 |
+
"grad_norm": 3.150613307952881,
|
| 817 |
+
"learning_rate": 0.0001779653533490897,
|
| 818 |
+
"loss": 4.4491,
|
| 819 |
+
"step": 4750
|
| 820 |
+
},
|
| 821 |
+
{
|
| 822 |
+
"epoch": 0.36535651103761246,
|
| 823 |
+
"eval_loss": 4.564510345458984,
|
| 824 |
+
"eval_runtime": 18.5575,
|
| 825 |
+
"eval_samples_per_second": 53.886,
|
| 826 |
+
"eval_steps_per_second": 13.472,
|
| 827 |
+
"step": 4750
|
| 828 |
+
},
|
| 829 |
+
{
|
| 830 |
+
"epoch": 0.3692023690485347,
|
| 831 |
+
"grad_norm": 4.828207492828369,
|
| 832 |
+
"learning_rate": 0.0001777056333272732,
|
| 833 |
+
"loss": 4.5923,
|
| 834 |
+
"step": 4800
|
| 835 |
+
},
|
| 836 |
+
{
|
| 837 |
+
"epoch": 0.37304822705945695,
|
| 838 |
+
"grad_norm": 3.780848264694214,
|
| 839 |
+
"learning_rate": 0.0001774459133054567,
|
| 840 |
+
"loss": 4.5544,
|
| 841 |
+
"step": 4850
|
| 842 |
+
},
|
| 843 |
+
{
|
| 844 |
+
"epoch": 0.3768940850703792,
|
| 845 |
+
"grad_norm": 4.04913854598999,
|
| 846 |
+
"learning_rate": 0.00017718619328364023,
|
| 847 |
+
"loss": 4.5271,
|
| 848 |
+
"step": 4900
|
| 849 |
+
},
|
| 850 |
+
{
|
| 851 |
+
"epoch": 0.38073994308130144,
|
| 852 |
+
"grad_norm": 4.097137451171875,
|
| 853 |
+
"learning_rate": 0.00017692647326182377,
|
| 854 |
+
"loss": 4.4929,
|
| 855 |
+
"step": 4950
|
| 856 |
+
},
|
| 857 |
+
{
|
| 858 |
+
"epoch": 0.3845858010922237,
|
| 859 |
+
"grad_norm": 4.65788459777832,
|
| 860 |
+
"learning_rate": 0.00017666675324000728,
|
| 861 |
+
"loss": 4.5888,
|
| 862 |
+
"step": 5000
|
| 863 |
+
},
|
| 864 |
+
{
|
| 865 |
+
"epoch": 0.3845858010922237,
|
| 866 |
+
"eval_loss": 4.465761661529541,
|
| 867 |
+
"eval_runtime": 18.6518,
|
| 868 |
+
"eval_samples_per_second": 53.614,
|
| 869 |
+
"eval_steps_per_second": 13.404,
|
| 870 |
+
"step": 5000
|
| 871 |
}
|
| 872 |
],
|
| 873 |
"logging_steps": 50,
|