Training in progress, step 37000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 132187888
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c9908eb2bd5f9beaa06015a751a042f84d87660bd9118a9d8c6df3afc04ac10
|
| 3 |
size 132187888
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1800
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a8df926c54955b4f050345ff87bc95e0eaf9e14e0c202091aed069141a6d8050
|
| 3 |
size 1800
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 264665786
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4baf25384281505a1f4c020627ece1722b2a1cb0bdf59122f0338fb59149157c
|
| 3 |
size 264665786
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8956824c98764344b0f23bb58a4085e09bf86c1c62227126501658f2249b0da6
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8dbf48616e0aa20785358bd8c57fa652f00571ca576de0c652d60cefc5452b44
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0bc995789ac7ace85eec5527f15f9a82c9f1388944ba2d5baa678f54ce3d8943
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 0.
|
| 4 |
-
"best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -6286,6 +6286,92 @@
|
|
| 6286 |
"eval_samples_per_second": 22.584,
|
| 6287 |
"eval_steps_per_second": 5.646,
|
| 6288 |
"step": 36500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6289 |
}
|
| 6290 |
],
|
| 6291 |
"logging_steps": 50,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 37000,
|
| 3 |
+
"best_metric": 0.5988173484802246,
|
| 4 |
+
"best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-37000",
|
| 5 |
+
"epoch": 2.845934928082455,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 37000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 6286 |
"eval_samples_per_second": 22.584,
|
| 6287 |
"eval_steps_per_second": 5.646,
|
| 6288 |
"step": 36500
|
| 6289 |
+
},
|
| 6290 |
+
{
|
| 6291 |
+
"epoch": 2.811322205984155,
|
| 6292 |
+
"grad_norm": 0.9810405969619751,
|
| 6293 |
+
"learning_rate": 1.2824974677297874e-05,
|
| 6294 |
+
"loss": 0.6134,
|
| 6295 |
+
"step": 36550
|
| 6296 |
+
},
|
| 6297 |
+
{
|
| 6298 |
+
"epoch": 2.8151680639950776,
|
| 6299 |
+
"grad_norm": 1.0784183740615845,
|
| 6300 |
+
"learning_rate": 1.2565254655481392e-05,
|
| 6301 |
+
"loss": 0.5578,
|
| 6302 |
+
"step": 36600
|
| 6303 |
+
},
|
| 6304 |
+
{
|
| 6305 |
+
"epoch": 2.8190139220059995,
|
| 6306 |
+
"grad_norm": 1.193577527999878,
|
| 6307 |
+
"learning_rate": 1.230553463366491e-05,
|
| 6308 |
+
"loss": 0.595,
|
| 6309 |
+
"step": 36650
|
| 6310 |
+
},
|
| 6311 |
+
{
|
| 6312 |
+
"epoch": 2.8228597800169215,
|
| 6313 |
+
"grad_norm": 1.293881893157959,
|
| 6314 |
+
"learning_rate": 1.204581461184843e-05,
|
| 6315 |
+
"loss": 0.6137,
|
| 6316 |
+
"step": 36700
|
| 6317 |
+
},
|
| 6318 |
+
{
|
| 6319 |
+
"epoch": 2.826705638027844,
|
| 6320 |
+
"grad_norm": 1.2237833738327026,
|
| 6321 |
+
"learning_rate": 1.1786094590031946e-05,
|
| 6322 |
+
"loss": 0.6168,
|
| 6323 |
+
"step": 36750
|
| 6324 |
+
},
|
| 6325 |
+
{
|
| 6326 |
+
"epoch": 2.826705638027844,
|
| 6327 |
+
"eval_loss": 0.6000112891197205,
|
| 6328 |
+
"eval_runtime": 21.2269,
|
| 6329 |
+
"eval_samples_per_second": 23.555,
|
| 6330 |
+
"eval_steps_per_second": 5.889,
|
| 6331 |
+
"step": 36750
|
| 6332 |
+
},
|
| 6333 |
+
{
|
| 6334 |
+
"epoch": 2.8305514960387663,
|
| 6335 |
+
"grad_norm": 1.132026195526123,
|
| 6336 |
+
"learning_rate": 1.1526374568215465e-05,
|
| 6337 |
+
"loss": 0.603,
|
| 6338 |
+
"step": 36800
|
| 6339 |
+
},
|
| 6340 |
+
{
|
| 6341 |
+
"epoch": 2.8343973540496883,
|
| 6342 |
+
"grad_norm": 0.6755896210670471,
|
| 6343 |
+
"learning_rate": 1.1266654546398983e-05,
|
| 6344 |
+
"loss": 0.6041,
|
| 6345 |
+
"step": 36850
|
| 6346 |
+
},
|
| 6347 |
+
{
|
| 6348 |
+
"epoch": 2.8382432120606107,
|
| 6349 |
+
"grad_norm": 1.1434203386306763,
|
| 6350 |
+
"learning_rate": 1.1006934524582501e-05,
|
| 6351 |
+
"loss": 0.593,
|
| 6352 |
+
"step": 36900
|
| 6353 |
+
},
|
| 6354 |
+
{
|
| 6355 |
+
"epoch": 2.842089070071533,
|
| 6356 |
+
"grad_norm": 0.8664344549179077,
|
| 6357 |
+
"learning_rate": 1.0747214502766019e-05,
|
| 6358 |
+
"loss": 0.6159,
|
| 6359 |
+
"step": 36950
|
| 6360 |
+
},
|
| 6361 |
+
{
|
| 6362 |
+
"epoch": 2.845934928082455,
|
| 6363 |
+
"grad_norm": 1.2732676267623901,
|
| 6364 |
+
"learning_rate": 1.0487494480949537e-05,
|
| 6365 |
+
"loss": 0.6146,
|
| 6366 |
+
"step": 37000
|
| 6367 |
+
},
|
| 6368 |
+
{
|
| 6369 |
+
"epoch": 2.845934928082455,
|
| 6370 |
+
"eval_loss": 0.5988173484802246,
|
| 6371 |
+
"eval_runtime": 22.0557,
|
| 6372 |
+
"eval_samples_per_second": 22.67,
|
| 6373 |
+
"eval_steps_per_second": 5.667,
|
| 6374 |
+
"step": 37000
|
| 6375 |
}
|
| 6376 |
],
|
| 6377 |
"logging_steps": 50,
|