Training in progress, step 36000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 132187888
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dcfacbeb4f4eddb175b2e0f9ee4f9f5fcd3804eb9c67bebaf63f8ba868cd0c6f
|
| 3 |
size 132187888
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1800
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb54fb35a407195fa0393ccddae444ffa53bbd93544621c32110b554f176ae82
|
| 3 |
size 1800
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 264665786
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d19f177598244598d1f9fc2b6f1af6c8f2b072a1487abb8927d38b4c9181ebf2
|
| 3 |
size 264665786
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f04bd3a9c50ae82a14c7b2fa792411be370b9b5b60aeb5bfd024183874a6a1d
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:23a098e227a7f13ed947fa28dc0329c514f46192f6bb6910a1d5556540e69df6
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:906c43e30b9edc8f130b3f77317e01f206c6e5f1267c22ca4899b9c5968ba8ca
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 31000,
|
| 3 |
"best_metric": 0.6043956279754639,
|
| 4 |
"best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-31000",
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -6114,6 +6114,92 @@
|
|
| 6114 |
"eval_samples_per_second": 22.538,
|
| 6115 |
"eval_steps_per_second": 5.635,
|
| 6116 |
"step": 35500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6117 |
}
|
| 6118 |
],
|
| 6119 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": 31000,
|
| 3 |
"best_metric": 0.6043956279754639,
|
| 4 |
"best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-31000",
|
| 5 |
+
"epoch": 2.7690177678640104,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 36000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 6114 |
"eval_samples_per_second": 22.538,
|
| 6115 |
"eval_steps_per_second": 5.635,
|
| 6116 |
"step": 35500
|
| 6117 |
+
},
|
| 6118 |
+
{
|
| 6119 |
+
"epoch": 2.73440504576571,
|
| 6120 |
+
"grad_norm": 0.6087201237678528,
|
| 6121 |
+
"learning_rate": 1.801937511362751e-05,
|
| 6122 |
+
"loss": 0.5581,
|
| 6123 |
+
"step": 35550
|
| 6124 |
+
},
|
| 6125 |
+
{
|
| 6126 |
+
"epoch": 2.7382509037766325,
|
| 6127 |
+
"grad_norm": 1.1166139841079712,
|
| 6128 |
+
"learning_rate": 1.775965509181103e-05,
|
| 6129 |
+
"loss": 0.581,
|
| 6130 |
+
"step": 35600
|
| 6131 |
+
},
|
| 6132 |
+
{
|
| 6133 |
+
"epoch": 2.742096761787555,
|
| 6134 |
+
"grad_norm": 0.6570024490356445,
|
| 6135 |
+
"learning_rate": 1.7499935069994548e-05,
|
| 6136 |
+
"loss": 0.6358,
|
| 6137 |
+
"step": 35650
|
| 6138 |
+
},
|
| 6139 |
+
{
|
| 6140 |
+
"epoch": 2.745942619798477,
|
| 6141 |
+
"grad_norm": 0.4993269443511963,
|
| 6142 |
+
"learning_rate": 1.7240215048178064e-05,
|
| 6143 |
+
"loss": 0.6199,
|
| 6144 |
+
"step": 35700
|
| 6145 |
+
},
|
| 6146 |
+
{
|
| 6147 |
+
"epoch": 2.7497884778093993,
|
| 6148 |
+
"grad_norm": 1.052513599395752,
|
| 6149 |
+
"learning_rate": 1.6980495026361584e-05,
|
| 6150 |
+
"loss": 0.6046,
|
| 6151 |
+
"step": 35750
|
| 6152 |
+
},
|
| 6153 |
+
{
|
| 6154 |
+
"epoch": 2.7497884778093993,
|
| 6155 |
+
"eval_loss": 0.6056188344955444,
|
| 6156 |
+
"eval_runtime": 21.3371,
|
| 6157 |
+
"eval_samples_per_second": 23.433,
|
| 6158 |
+
"eval_steps_per_second": 5.858,
|
| 6159 |
+
"step": 35750
|
| 6160 |
+
},
|
| 6161 |
+
{
|
| 6162 |
+
"epoch": 2.7536343358203217,
|
| 6163 |
+
"grad_norm": 0.6118621826171875,
|
| 6164 |
+
"learning_rate": 1.67207750045451e-05,
|
| 6165 |
+
"loss": 0.6034,
|
| 6166 |
+
"step": 35800
|
| 6167 |
+
},
|
| 6168 |
+
{
|
| 6169 |
+
"epoch": 2.7574801938312437,
|
| 6170 |
+
"grad_norm": 1.1678482294082642,
|
| 6171 |
+
"learning_rate": 1.646105498272862e-05,
|
| 6172 |
+
"loss": 0.5899,
|
| 6173 |
+
"step": 35850
|
| 6174 |
+
},
|
| 6175 |
+
{
|
| 6176 |
+
"epoch": 2.761326051842166,
|
| 6177 |
+
"grad_norm": 1.0577653646469116,
|
| 6178 |
+
"learning_rate": 1.6201334960912136e-05,
|
| 6179 |
+
"loss": 0.6237,
|
| 6180 |
+
"step": 35900
|
| 6181 |
+
},
|
| 6182 |
+
{
|
| 6183 |
+
"epoch": 2.7651719098530885,
|
| 6184 |
+
"grad_norm": 0.7498691082000732,
|
| 6185 |
+
"learning_rate": 1.5941614939095655e-05,
|
| 6186 |
+
"loss": 0.6013,
|
| 6187 |
+
"step": 35950
|
| 6188 |
+
},
|
| 6189 |
+
{
|
| 6190 |
+
"epoch": 2.7690177678640104,
|
| 6191 |
+
"grad_norm": 1.02476966381073,
|
| 6192 |
+
"learning_rate": 1.5681894917279175e-05,
|
| 6193 |
+
"loss": 0.6265,
|
| 6194 |
+
"step": 36000
|
| 6195 |
+
},
|
| 6196 |
+
{
|
| 6197 |
+
"epoch": 2.7690177678640104,
|
| 6198 |
+
"eval_loss": 0.6054879426956177,
|
| 6199 |
+
"eval_runtime": 22.0853,
|
| 6200 |
+
"eval_samples_per_second": 22.639,
|
| 6201 |
+
"eval_steps_per_second": 5.66,
|
| 6202 |
+
"step": 36000
|
| 6203 |
}
|
| 6204 |
],
|
| 6205 |
"logging_steps": 50,
|