rootxhacker commited on
Commit
c38097b
·
verified ·
1 Parent(s): 039c2f6

Training in progress, step 31000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f72bd5a3c71ee7c0378f6c13844dc64c89949a7e7e1e07f8933751b8df932c7
3
  size 132187888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c822cadf122b8d81bed076d5b6b6c87adeac04badf1f9b9b9d715859da5b1843
3
  size 132187888
last-checkpoint/ar_diffusion_info.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e85124ad974e9aa6591318b973e27a8c6a21355c73a2b688604b07888ca77bfd
3
  size 1800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c581a475019fa3a8f30579300dafcbee366ee095e196a5f5274ed005879d33e4
3
  size 1800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bfbd83026836fb7e11711c7d54e9b00d7e03835d945121611d08eb2713a80bd
3
  size 264665786
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de5fca106ac94431cf0d18f7ca11bd0da4ea78e43121004b46ea9f6bfa639a81
3
  size 264665786
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e898de494648db9b26425188387425acd19b2117f233464d12d3be9a3ed8c13
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97ab3280ba738c96067535954dc214f9f14277e63441d3d85f0ccbd573a6d6e3
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:851ff73959206b5bf637351b456e6acc61cd20020d42c449aed8671d33f868f2
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3037fa29c14116fb7f57a7d8f13370ce35ad863cc8cab599d44882849d5d0780
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52513f6a22920e0a6a384b951f3cbc4c30d39da66d57f762b33f885b2ce6ee0e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed58c2b335b3b36d75acfd3c9f3a4f61a466c2c389eddcb7dec50bfb380a2d25
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 30500,
3
- "best_metric": 0.6086920499801636,
4
- "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-30500",
5
- "epoch": 2.3459733866625645,
6
  "eval_steps": 250,
7
- "global_step": 30500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -5254,6 +5254,92 @@
5254
  "eval_samples_per_second": 22.369,
5255
  "eval_steps_per_second": 5.592,
5256
  "step": 30500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5257
  }
5258
  ],
5259
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 31000,
3
+ "best_metric": 0.6043956279754639,
4
+ "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-31000",
5
+ "epoch": 2.384431966771787,
6
  "eval_steps": 250,
7
+ "global_step": 31000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
5254
  "eval_samples_per_second": 22.369,
5255
  "eval_steps_per_second": 5.592,
5256
  "step": 30500
5257
+ },
5258
+ {
5259
+ "epoch": 2.3498192446734865,
5260
+ "grad_norm": 1.1905983686447144,
5261
+ "learning_rate": 4.39757940939667e-05,
5262
+ "loss": 0.6307,
5263
+ "step": 30550
5264
+ },
5265
+ {
5266
+ "epoch": 2.353665102684409,
5267
+ "grad_norm": 0.7451447248458862,
5268
+ "learning_rate": 4.371607407215023e-05,
5269
+ "loss": 0.6096,
5270
+ "step": 30600
5271
+ },
5272
+ {
5273
+ "epoch": 2.3575109606953313,
5274
+ "grad_norm": 0.852059006690979,
5275
+ "learning_rate": 4.345635405033374e-05,
5276
+ "loss": 0.6403,
5277
+ "step": 30650
5278
+ },
5279
+ {
5280
+ "epoch": 2.3613568187062532,
5281
+ "grad_norm": 0.8270148634910583,
5282
+ "learning_rate": 4.319663402851726e-05,
5283
+ "loss": 0.6366,
5284
+ "step": 30700
5285
+ },
5286
+ {
5287
+ "epoch": 2.3652026767171757,
5288
+ "grad_norm": 0.7992098331451416,
5289
+ "learning_rate": 4.293691400670078e-05,
5290
+ "loss": 0.6164,
5291
+ "step": 30750
5292
+ },
5293
+ {
5294
+ "epoch": 2.3652026767171757,
5295
+ "eval_loss": 0.6099753975868225,
5296
+ "eval_runtime": 21.604,
5297
+ "eval_samples_per_second": 23.144,
5298
+ "eval_steps_per_second": 5.786,
5299
+ "step": 30750
5300
+ },
5301
+ {
5302
+ "epoch": 2.3690485347280976,
5303
+ "grad_norm": 1.0327460765838623,
5304
+ "learning_rate": 4.267719398488429e-05,
5305
+ "loss": 0.6275,
5306
+ "step": 30800
5307
+ },
5308
+ {
5309
+ "epoch": 2.37289439273902,
5310
+ "grad_norm": 1.0831198692321777,
5311
+ "learning_rate": 4.241747396306782e-05,
5312
+ "loss": 0.6225,
5313
+ "step": 30850
5314
+ },
5315
+ {
5316
+ "epoch": 2.3767402507499424,
5317
+ "grad_norm": 0.7838327288627625,
5318
+ "learning_rate": 4.215775394125133e-05,
5319
+ "loss": 0.5987,
5320
+ "step": 30900
5321
+ },
5322
+ {
5323
+ "epoch": 2.3805861087608644,
5324
+ "grad_norm": 0.8668245673179626,
5325
+ "learning_rate": 4.189803391943485e-05,
5326
+ "loss": 0.6321,
5327
+ "step": 30950
5328
+ },
5329
+ {
5330
+ "epoch": 2.384431966771787,
5331
+ "grad_norm": 0.9330748319625854,
5332
+ "learning_rate": 4.163831389761837e-05,
5333
+ "loss": 0.6308,
5334
+ "step": 31000
5335
+ },
5336
+ {
5337
+ "epoch": 2.384431966771787,
5338
+ "eval_loss": 0.6043956279754639,
5339
+ "eval_runtime": 22.3345,
5340
+ "eval_samples_per_second": 22.387,
5341
+ "eval_steps_per_second": 5.597,
5342
+ "step": 31000
5343
  }
5344
  ],
5345
  "logging_steps": 50,