rootxhacker commited on
Commit
7236f73
·
verified ·
1 Parent(s): cae093a

Training in progress, step 31500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c822cadf122b8d81bed076d5b6b6c87adeac04badf1f9b9b9d715859da5b1843
3
  size 132187888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb1d7e6c9c0b0a239d02e15a2159556c5bd1fcf1e8847f331a9a6e433d20fa5a
3
  size 132187888
last-checkpoint/ar_diffusion_info.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c581a475019fa3a8f30579300dafcbee366ee095e196a5f5274ed005879d33e4
3
  size 1800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e86b3a858274139b3be961d3af1a11f158dbed04011a2fb0d226fa427bebe93
3
  size 1800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de5fca106ac94431cf0d18f7ca11bd0da4ea78e43121004b46ea9f6bfa639a81
3
  size 264665786
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e7c2116295a25f8bb18ac5af56b3b66bcefc07893fdd1dace52696c12337661
3
  size 264665786
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97ab3280ba738c96067535954dc214f9f14277e63441d3d85f0ccbd573a6d6e3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79c133c2a3f6f8e5d4540624b02ef0bb23de1d12e242c3f6a4a6fbfc3892c66b
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3037fa29c14116fb7f57a7d8f13370ce35ad863cc8cab599d44882849d5d0780
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8bd5d9ea98bbf4c19a3f4d1081add700e97c16f34e816ad12ce83b81a590f6a
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed58c2b335b3b36d75acfd3c9f3a4f61a466c2c389eddcb7dec50bfb380a2d25
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cc290e356b72b5d6c7ea8116aa7addab4b6ab1041682e12bf6b315f56282c40
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 31000,
3
  "best_metric": 0.6043956279754639,
4
  "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-31000",
5
- "epoch": 2.384431966771787,
6
  "eval_steps": 250,
7
- "global_step": 31000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -5340,6 +5340,92 @@
5340
  "eval_samples_per_second": 22.387,
5341
  "eval_steps_per_second": 5.597,
5342
  "step": 31000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5343
  }
5344
  ],
5345
  "logging_steps": 50,
 
2
  "best_global_step": 31000,
3
  "best_metric": 0.6043956279754639,
4
  "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-31000",
5
+ "epoch": 2.422890546881009,
6
  "eval_steps": 250,
7
+ "global_step": 31500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
5340
  "eval_samples_per_second": 22.387,
5341
  "eval_steps_per_second": 5.597,
5342
  "step": 31000
5343
+ },
5344
+ {
5345
+ "epoch": 2.3882778247827092,
5346
+ "grad_norm": 0.873634934425354,
5347
+ "learning_rate": 4.1378593875801884e-05,
5348
+ "loss": 0.6262,
5349
+ "step": 31050
5350
+ },
5351
+ {
5352
+ "epoch": 2.392123682793631,
5353
+ "grad_norm": 0.8385778069496155,
5354
+ "learning_rate": 4.1118873853985404e-05,
5355
+ "loss": 0.6317,
5356
+ "step": 31100
5357
+ },
5358
+ {
5359
+ "epoch": 2.3959695408045536,
5360
+ "grad_norm": 1.0967971086502075,
5361
+ "learning_rate": 4.085915383216892e-05,
5362
+ "loss": 0.6406,
5363
+ "step": 31150
5364
+ },
5365
+ {
5366
+ "epoch": 2.3998153988154756,
5367
+ "grad_norm": 0.6314703226089478,
5368
+ "learning_rate": 4.059943381035244e-05,
5369
+ "loss": 0.621,
5370
+ "step": 31200
5371
+ },
5372
+ {
5373
+ "epoch": 2.403661256826398,
5374
+ "grad_norm": 0.8299015164375305,
5375
+ "learning_rate": 4.033971378853596e-05,
5376
+ "loss": 0.626,
5377
+ "step": 31250
5378
+ },
5379
+ {
5380
+ "epoch": 2.403661256826398,
5381
+ "eval_loss": 0.6425282955169678,
5382
+ "eval_runtime": 21.4071,
5383
+ "eval_samples_per_second": 23.357,
5384
+ "eval_steps_per_second": 5.839,
5385
+ "step": 31250
5386
+ },
5387
+ {
5388
+ "epoch": 2.4075071148373204,
5389
+ "grad_norm": 0.6408383846282959,
5390
+ "learning_rate": 4.0079993766719475e-05,
5391
+ "loss": 0.6407,
5392
+ "step": 31300
5393
+ },
5394
+ {
5395
+ "epoch": 2.4113529728482423,
5396
+ "grad_norm": 0.7746095061302185,
5397
+ "learning_rate": 3.9820273744902995e-05,
5398
+ "loss": 0.6294,
5399
+ "step": 31350
5400
+ },
5401
+ {
5402
+ "epoch": 2.4151988308591648,
5403
+ "grad_norm": 1.1451231241226196,
5404
+ "learning_rate": 3.9560553723086515e-05,
5405
+ "loss": 0.6509,
5406
+ "step": 31400
5407
+ },
5408
+ {
5409
+ "epoch": 2.4190446888700867,
5410
+ "grad_norm": 0.6468200087547302,
5411
+ "learning_rate": 3.9300833701270034e-05,
5412
+ "loss": 0.605,
5413
+ "step": 31450
5414
+ },
5415
+ {
5416
+ "epoch": 2.422890546881009,
5417
+ "grad_norm": 1.0352072715759277,
5418
+ "learning_rate": 3.904111367945355e-05,
5419
+ "loss": 0.6356,
5420
+ "step": 31500
5421
+ },
5422
+ {
5423
+ "epoch": 2.422890546881009,
5424
+ "eval_loss": 0.641932487487793,
5425
+ "eval_runtime": 22.3719,
5426
+ "eval_samples_per_second": 22.349,
5427
+ "eval_steps_per_second": 5.587,
5428
+ "step": 31500
5429
  }
5430
  ],
5431
  "logging_steps": 50,