Training in progress, step 28000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c2b2a09b0c718d7a491a19203c596e4ee868ab1c484877d294e6c4faf6f30efb
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1544
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84d5f993c46a18f90bdec40ac57cbb61c0044ca158da830a2a251cb1b9f73c26
|
| 3 |
size 1544
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f145355c23ee5fb538578cd9954c12d1f33dfb98ccfc124985339d68356ee942
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a374ffed3d3d8a7f9634cdb9b7207d347f6957d04522010a4d50d73e4c623442
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:13f83252b2ae8a53cd4f408d431cb4cec02e9d65b9dadef7fdff56180c3d5cd3
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:969919a5c5fc35784da996c8ca0a4cb297b59fb4c9aadbd1a769e70ab51007e7
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 1.
|
| 4 |
-
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4738,6 +4738,92 @@
|
|
| 4738 |
"eval_samples_per_second": 56.49,
|
| 4739 |
"eval_steps_per_second": 14.123,
|
| 4740 |
"step": 27500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4741 |
}
|
| 4742 |
],
|
| 4743 |
"logging_steps": 50,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 28000,
|
| 3 |
+
"best_metric": 1.008616328239441,
|
| 4 |
+
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-28000",
|
| 5 |
+
"epoch": 2.1536804861164525,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 28000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4738 |
"eval_samples_per_second": 56.49,
|
| 4739 |
"eval_steps_per_second": 14.123,
|
| 4740 |
"step": 27500
|
| 4741 |
+
},
|
| 4742 |
+
{
|
| 4743 |
+
"epoch": 2.1190677640181526,
|
| 4744 |
+
"grad_norm": 0.9638277292251587,
|
| 4745 |
+
"learning_rate": 5.9564189803391944e-05,
|
| 4746 |
+
"loss": 0.9753,
|
| 4747 |
+
"step": 27550
|
| 4748 |
+
},
|
| 4749 |
+
{
|
| 4750 |
+
"epoch": 2.1229136220290745,
|
| 4751 |
+
"grad_norm": 1.1322181224822998,
|
| 4752 |
+
"learning_rate": 5.9304469781575464e-05,
|
| 4753 |
+
"loss": 0.948,
|
| 4754 |
+
"step": 27600
|
| 4755 |
+
},
|
| 4756 |
+
{
|
| 4757 |
+
"epoch": 2.126759480039997,
|
| 4758 |
+
"grad_norm": 1.144047737121582,
|
| 4759 |
+
"learning_rate": 5.904474975975898e-05,
|
| 4760 |
+
"loss": 1.009,
|
| 4761 |
+
"step": 27650
|
| 4762 |
+
},
|
| 4763 |
+
{
|
| 4764 |
+
"epoch": 2.1306053380509193,
|
| 4765 |
+
"grad_norm": 2.713625431060791,
|
| 4766 |
+
"learning_rate": 5.8785029737942496e-05,
|
| 4767 |
+
"loss": 1.0069,
|
| 4768 |
+
"step": 27700
|
| 4769 |
+
},
|
| 4770 |
+
{
|
| 4771 |
+
"epoch": 2.1344511960618413,
|
| 4772 |
+
"grad_norm": 1.1035822629928589,
|
| 4773 |
+
"learning_rate": 5.852530971612602e-05,
|
| 4774 |
+
"loss": 1.0013,
|
| 4775 |
+
"step": 27750
|
| 4776 |
+
},
|
| 4777 |
+
{
|
| 4778 |
+
"epoch": 2.1344511960618413,
|
| 4779 |
+
"eval_loss": 1.0144418478012085,
|
| 4780 |
+
"eval_runtime": 17.6881,
|
| 4781 |
+
"eval_samples_per_second": 56.535,
|
| 4782 |
+
"eval_steps_per_second": 14.134,
|
| 4783 |
+
"step": 27750
|
| 4784 |
+
},
|
| 4785 |
+
{
|
| 4786 |
+
"epoch": 2.1382970540727637,
|
| 4787 |
+
"grad_norm": 1.2658100128173828,
|
| 4788 |
+
"learning_rate": 5.826558969430954e-05,
|
| 4789 |
+
"loss": 1.0185,
|
| 4790 |
+
"step": 27800
|
| 4791 |
+
},
|
| 4792 |
+
{
|
| 4793 |
+
"epoch": 2.1421429120836857,
|
| 4794 |
+
"grad_norm": 0.9421238303184509,
|
| 4795 |
+
"learning_rate": 5.800586967249305e-05,
|
| 4796 |
+
"loss": 0.9892,
|
| 4797 |
+
"step": 27850
|
| 4798 |
+
},
|
| 4799 |
+
{
|
| 4800 |
+
"epoch": 2.145988770094608,
|
| 4801 |
+
"grad_norm": 0.9409565925598145,
|
| 4802 |
+
"learning_rate": 5.7746149650676575e-05,
|
| 4803 |
+
"loss": 0.9985,
|
| 4804 |
+
"step": 27900
|
| 4805 |
+
},
|
| 4806 |
+
{
|
| 4807 |
+
"epoch": 2.1498346281055305,
|
| 4808 |
+
"grad_norm": 1.445890188217163,
|
| 4809 |
+
"learning_rate": 5.7486429628860094e-05,
|
| 4810 |
+
"loss": 1.0147,
|
| 4811 |
+
"step": 27950
|
| 4812 |
+
},
|
| 4813 |
+
{
|
| 4814 |
+
"epoch": 2.1536804861164525,
|
| 4815 |
+
"grad_norm": 1.109020709991455,
|
| 4816 |
+
"learning_rate": 5.722670960704361e-05,
|
| 4817 |
+
"loss": 1.0093,
|
| 4818 |
+
"step": 28000
|
| 4819 |
+
},
|
| 4820 |
+
{
|
| 4821 |
+
"epoch": 2.1536804861164525,
|
| 4822 |
+
"eval_loss": 1.008616328239441,
|
| 4823 |
+
"eval_runtime": 17.6489,
|
| 4824 |
+
"eval_samples_per_second": 56.661,
|
| 4825 |
+
"eval_steps_per_second": 14.165,
|
| 4826 |
+
"step": 28000
|
| 4827 |
}
|
| 4828 |
],
|
| 4829 |
"logging_steps": 50,
|