Training in progress, step 31500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 132187888
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb1d7e6c9c0b0a239d02e15a2159556c5bd1fcf1e8847f331a9a6e433d20fa5a
|
| 3 |
size 132187888
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1800
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e86b3a858274139b3be961d3af1a11f158dbed04011a2fb0d226fa427bebe93
|
| 3 |
size 1800
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 264665786
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e7c2116295a25f8bb18ac5af56b3b66bcefc07893fdd1dace52696c12337661
|
| 3 |
size 264665786
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79c133c2a3f6f8e5d4540624b02ef0bb23de1d12e242c3f6a4a6fbfc3892c66b
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8bd5d9ea98bbf4c19a3f4d1081add700e97c16f34e816ad12ce83b81a590f6a
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3cc290e356b72b5d6c7ea8116aa7addab4b6ab1041682e12bf6b315f56282c40
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 31000,
|
| 3 |
"best_metric": 0.6043956279754639,
|
| 4 |
"best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-31000",
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -5340,6 +5340,92 @@
|
|
| 5340 |
"eval_samples_per_second": 22.387,
|
| 5341 |
"eval_steps_per_second": 5.597,
|
| 5342 |
"step": 31000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5343 |
}
|
| 5344 |
],
|
| 5345 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": 31000,
|
| 3 |
"best_metric": 0.6043956279754639,
|
| 4 |
"best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-31000",
|
| 5 |
+
"epoch": 2.422890546881009,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 31500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 5340 |
"eval_samples_per_second": 22.387,
|
| 5341 |
"eval_steps_per_second": 5.597,
|
| 5342 |
"step": 31000
|
| 5343 |
+
},
|
| 5344 |
+
{
|
| 5345 |
+
"epoch": 2.3882778247827092,
|
| 5346 |
+
"grad_norm": 0.873634934425354,
|
| 5347 |
+
"learning_rate": 4.1378593875801884e-05,
|
| 5348 |
+
"loss": 0.6262,
|
| 5349 |
+
"step": 31050
|
| 5350 |
+
},
|
| 5351 |
+
{
|
| 5352 |
+
"epoch": 2.392123682793631,
|
| 5353 |
+
"grad_norm": 0.8385778069496155,
|
| 5354 |
+
"learning_rate": 4.1118873853985404e-05,
|
| 5355 |
+
"loss": 0.6317,
|
| 5356 |
+
"step": 31100
|
| 5357 |
+
},
|
| 5358 |
+
{
|
| 5359 |
+
"epoch": 2.3959695408045536,
|
| 5360 |
+
"grad_norm": 1.0967971086502075,
|
| 5361 |
+
"learning_rate": 4.085915383216892e-05,
|
| 5362 |
+
"loss": 0.6406,
|
| 5363 |
+
"step": 31150
|
| 5364 |
+
},
|
| 5365 |
+
{
|
| 5366 |
+
"epoch": 2.3998153988154756,
|
| 5367 |
+
"grad_norm": 0.6314703226089478,
|
| 5368 |
+
"learning_rate": 4.059943381035244e-05,
|
| 5369 |
+
"loss": 0.621,
|
| 5370 |
+
"step": 31200
|
| 5371 |
+
},
|
| 5372 |
+
{
|
| 5373 |
+
"epoch": 2.403661256826398,
|
| 5374 |
+
"grad_norm": 0.8299015164375305,
|
| 5375 |
+
"learning_rate": 4.033971378853596e-05,
|
| 5376 |
+
"loss": 0.626,
|
| 5377 |
+
"step": 31250
|
| 5378 |
+
},
|
| 5379 |
+
{
|
| 5380 |
+
"epoch": 2.403661256826398,
|
| 5381 |
+
"eval_loss": 0.6425282955169678,
|
| 5382 |
+
"eval_runtime": 21.4071,
|
| 5383 |
+
"eval_samples_per_second": 23.357,
|
| 5384 |
+
"eval_steps_per_second": 5.839,
|
| 5385 |
+
"step": 31250
|
| 5386 |
+
},
|
| 5387 |
+
{
|
| 5388 |
+
"epoch": 2.4075071148373204,
|
| 5389 |
+
"grad_norm": 0.6408383846282959,
|
| 5390 |
+
"learning_rate": 4.0079993766719475e-05,
|
| 5391 |
+
"loss": 0.6407,
|
| 5392 |
+
"step": 31300
|
| 5393 |
+
},
|
| 5394 |
+
{
|
| 5395 |
+
"epoch": 2.4113529728482423,
|
| 5396 |
+
"grad_norm": 0.7746095061302185,
|
| 5397 |
+
"learning_rate": 3.9820273744902995e-05,
|
| 5398 |
+
"loss": 0.6294,
|
| 5399 |
+
"step": 31350
|
| 5400 |
+
},
|
| 5401 |
+
{
|
| 5402 |
+
"epoch": 2.4151988308591648,
|
| 5403 |
+
"grad_norm": 1.1451231241226196,
|
| 5404 |
+
"learning_rate": 3.9560553723086515e-05,
|
| 5405 |
+
"loss": 0.6509,
|
| 5406 |
+
"step": 31400
|
| 5407 |
+
},
|
| 5408 |
+
{
|
| 5409 |
+
"epoch": 2.4190446888700867,
|
| 5410 |
+
"grad_norm": 0.6468200087547302,
|
| 5411 |
+
"learning_rate": 3.9300833701270034e-05,
|
| 5412 |
+
"loss": 0.605,
|
| 5413 |
+
"step": 31450
|
| 5414 |
+
},
|
| 5415 |
+
{
|
| 5416 |
+
"epoch": 2.422890546881009,
|
| 5417 |
+
"grad_norm": 1.0352072715759277,
|
| 5418 |
+
"learning_rate": 3.904111367945355e-05,
|
| 5419 |
+
"loss": 0.6356,
|
| 5420 |
+
"step": 31500
|
| 5421 |
+
},
|
| 5422 |
+
{
|
| 5423 |
+
"epoch": 2.422890546881009,
|
| 5424 |
+
"eval_loss": 0.641932487487793,
|
| 5425 |
+
"eval_runtime": 22.3719,
|
| 5426 |
+
"eval_samples_per_second": 22.349,
|
| 5427 |
+
"eval_steps_per_second": 5.587,
|
| 5428 |
+
"step": 31500
|
| 5429 |
}
|
| 5430 |
],
|
| 5431 |
"logging_steps": 50,
|