Training in progress, step 28000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 132187888
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9780097950800686c9322dd13a3faf02ad28fadad548e324be714511b2714201
|
| 3 |
size 132187888
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1800
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f0eeadcb7061b328806de65f859dc63fe68b92f70ad10c9992993064e4e91786
|
| 3 |
size 1800
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 264665786
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:897b5043f12ab5b080ec026763f5e58f9947bfeb9d3b5e68e21c2b6493f9e676
|
| 3 |
size 264665786
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91254c943660a3203eec1d026396102f3e0b80f3a8c66cc2f6add4b242607689
|
| 3 |
+
size 14180
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f013f71783c0dc60fa63541cd9fac53da38b82998b3731a80d7c115cba7f0bf1
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff477fcb74f9478dd012f656769bb7c4aa62a4bc43e62fb29e6d1b12821b2b70
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 0.
|
| 4 |
-
"best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4738,6 +4738,92 @@
|
|
| 4738 |
"eval_samples_per_second": 22.397,
|
| 4739 |
"eval_steps_per_second": 5.599,
|
| 4740 |
"step": 27500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4741 |
}
|
| 4742 |
],
|
| 4743 |
"logging_steps": 50,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 28000,
|
| 3 |
+
"best_metric": 0.6302720904350281,
|
| 4 |
+
"best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-28000",
|
| 5 |
+
"epoch": 2.1536804861164525,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 28000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4738 |
"eval_samples_per_second": 22.397,
|
| 4739 |
"eval_steps_per_second": 5.599,
|
| 4740 |
"step": 27500
|
| 4741 |
+
},
|
| 4742 |
+
{
|
| 4743 |
+
"epoch": 2.1190677640181526,
|
| 4744 |
+
"grad_norm": 0.9390980005264282,
|
| 4745 |
+
"learning_rate": 5.955899540295562e-05,
|
| 4746 |
+
"loss": 0.627,
|
| 4747 |
+
"step": 27550
|
| 4748 |
+
},
|
| 4749 |
+
{
|
| 4750 |
+
"epoch": 2.1229136220290745,
|
| 4751 |
+
"grad_norm": 1.0318114757537842,
|
| 4752 |
+
"learning_rate": 5.929927538113913e-05,
|
| 4753 |
+
"loss": 0.6328,
|
| 4754 |
+
"step": 27600
|
| 4755 |
+
},
|
| 4756 |
+
{
|
| 4757 |
+
"epoch": 2.126759480039997,
|
| 4758 |
+
"grad_norm": 0.8785481452941895,
|
| 4759 |
+
"learning_rate": 5.903955535932265e-05,
|
| 4760 |
+
"loss": 0.6715,
|
| 4761 |
+
"step": 27650
|
| 4762 |
+
},
|
| 4763 |
+
{
|
| 4764 |
+
"epoch": 2.1306053380509193,
|
| 4765 |
+
"grad_norm": 1.0937212705612183,
|
| 4766 |
+
"learning_rate": 5.877983533750617e-05,
|
| 4767 |
+
"loss": 0.6479,
|
| 4768 |
+
"step": 27700
|
| 4769 |
+
},
|
| 4770 |
+
{
|
| 4771 |
+
"epoch": 2.1344511960618413,
|
| 4772 |
+
"grad_norm": 0.7675368189811707,
|
| 4773 |
+
"learning_rate": 5.852011531568969e-05,
|
| 4774 |
+
"loss": 0.6473,
|
| 4775 |
+
"step": 27750
|
| 4776 |
+
},
|
| 4777 |
+
{
|
| 4778 |
+
"epoch": 2.1344511960618413,
|
| 4779 |
+
"eval_loss": 0.635150134563446,
|
| 4780 |
+
"eval_runtime": 21.3801,
|
| 4781 |
+
"eval_samples_per_second": 23.386,
|
| 4782 |
+
"eval_steps_per_second": 5.847,
|
| 4783 |
+
"step": 27750
|
| 4784 |
+
},
|
| 4785 |
+
{
|
| 4786 |
+
"epoch": 2.1382970540727637,
|
| 4787 |
+
"grad_norm": 1.0143834352493286,
|
| 4788 |
+
"learning_rate": 5.8260395293873204e-05,
|
| 4789 |
+
"loss": 0.6729,
|
| 4790 |
+
"step": 27800
|
| 4791 |
+
},
|
| 4792 |
+
{
|
| 4793 |
+
"epoch": 2.1421429120836857,
|
| 4794 |
+
"grad_norm": 0.970697283744812,
|
| 4795 |
+
"learning_rate": 5.8000675272056724e-05,
|
| 4796 |
+
"loss": 0.63,
|
| 4797 |
+
"step": 27850
|
| 4798 |
+
},
|
| 4799 |
+
{
|
| 4800 |
+
"epoch": 2.145988770094608,
|
| 4801 |
+
"grad_norm": 0.9831034541130066,
|
| 4802 |
+
"learning_rate": 5.774095525024025e-05,
|
| 4803 |
+
"loss": 0.6594,
|
| 4804 |
+
"step": 27900
|
| 4805 |
+
},
|
| 4806 |
+
{
|
| 4807 |
+
"epoch": 2.1498346281055305,
|
| 4808 |
+
"grad_norm": 0.9592450261116028,
|
| 4809 |
+
"learning_rate": 5.7481235228423756e-05,
|
| 4810 |
+
"loss": 0.657,
|
| 4811 |
+
"step": 27950
|
| 4812 |
+
},
|
| 4813 |
+
{
|
| 4814 |
+
"epoch": 2.1536804861164525,
|
| 4815 |
+
"grad_norm": 0.9553030133247375,
|
| 4816 |
+
"learning_rate": 5.722151520660728e-05,
|
| 4817 |
+
"loss": 0.6441,
|
| 4818 |
+
"step": 28000
|
| 4819 |
+
},
|
| 4820 |
+
{
|
| 4821 |
+
"epoch": 2.1536804861164525,
|
| 4822 |
+
"eval_loss": 0.6302720904350281,
|
| 4823 |
+
"eval_runtime": 22.2098,
|
| 4824 |
+
"eval_samples_per_second": 22.513,
|
| 4825 |
+
"eval_steps_per_second": 5.628,
|
| 4826 |
+
"step": 28000
|
| 4827 |
}
|
| 4828 |
],
|
| 4829 |
"logging_steps": 50,
|