rootxhacker commited on
Commit
80394d6
·
verified ·
1 Parent(s): e7654b9

Training in progress, step 37000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:651010c952c4f2e5438cf065b7526e8b7ed7591516d3bc5f9700bebff456e394
3
  size 132187888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c9908eb2bd5f9beaa06015a751a042f84d87660bd9118a9d8c6df3afc04ac10
3
  size 132187888
last-checkpoint/ar_diffusion_info.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee0e0adf2d9b4aaacdcafc95dc9f871bf71cf1d2a2f1053d39ef57319d6a1bfe
3
  size 1800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8df926c54955b4f050345ff87bc95e0eaf9e14e0c202091aed069141a6d8050
3
  size 1800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:444bcb69ebd6dade1e1ffb612d0702662876875378acdb30d35fc8fc0385d21c
3
  size 264665786
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4baf25384281505a1f4c020627ece1722b2a1cb0bdf59122f0338fb59149157c
3
  size 264665786
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8d399a9966448d8b21883a1ed6c9a0ed112cf6f2c15eec1d90ff9a1b08984a7
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8956824c98764344b0f23bb58a4085e09bf86c1c62227126501658f2249b0da6
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:727a5807be63831a649cf3a788d2dbe9fdce0ad9219d92aa0ebe11b8f8712f61
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dbf48616e0aa20785358bd8c57fa652f00571ca576de0c652d60cefc5452b44
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cf91a495853427b5f841ab1be3c35766ad0edbc12ff06e2c15160a47811ed22
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bc995789ac7ace85eec5527f15f9a82c9f1388944ba2d5baa678f54ce3d8943
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 36500,
3
- "best_metric": 0.6020208597183228,
4
- "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-36500",
5
- "epoch": 2.8074763479732328,
6
  "eval_steps": 250,
7
- "global_step": 36500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -6286,6 +6286,92 @@
6286
  "eval_samples_per_second": 22.584,
6287
  "eval_steps_per_second": 5.646,
6288
  "step": 36500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6289
  }
6290
  ],
6291
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 37000,
3
+ "best_metric": 0.5988173484802246,
4
+ "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-37000",
5
+ "epoch": 2.845934928082455,
6
  "eval_steps": 250,
7
+ "global_step": 37000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
6286
  "eval_samples_per_second": 22.584,
6287
  "eval_steps_per_second": 5.646,
6288
  "step": 36500
6289
+ },
6290
+ {
6291
+ "epoch": 2.811322205984155,
6292
+ "grad_norm": 0.9810405969619751,
6293
+ "learning_rate": 1.2824974677297874e-05,
6294
+ "loss": 0.6134,
6295
+ "step": 36550
6296
+ },
6297
+ {
6298
+ "epoch": 2.8151680639950776,
6299
+ "grad_norm": 1.0784183740615845,
6300
+ "learning_rate": 1.2565254655481392e-05,
6301
+ "loss": 0.5578,
6302
+ "step": 36600
6303
+ },
6304
+ {
6305
+ "epoch": 2.8190139220059995,
6306
+ "grad_norm": 1.193577527999878,
6307
+ "learning_rate": 1.230553463366491e-05,
6308
+ "loss": 0.595,
6309
+ "step": 36650
6310
+ },
6311
+ {
6312
+ "epoch": 2.8228597800169215,
6313
+ "grad_norm": 1.293881893157959,
6314
+ "learning_rate": 1.204581461184843e-05,
6315
+ "loss": 0.6137,
6316
+ "step": 36700
6317
+ },
6318
+ {
6319
+ "epoch": 2.826705638027844,
6320
+ "grad_norm": 1.2237833738327026,
6321
+ "learning_rate": 1.1786094590031946e-05,
6322
+ "loss": 0.6168,
6323
+ "step": 36750
6324
+ },
6325
+ {
6326
+ "epoch": 2.826705638027844,
6327
+ "eval_loss": 0.6000112891197205,
6328
+ "eval_runtime": 21.2269,
6329
+ "eval_samples_per_second": 23.555,
6330
+ "eval_steps_per_second": 5.889,
6331
+ "step": 36750
6332
+ },
6333
+ {
6334
+ "epoch": 2.8305514960387663,
6335
+ "grad_norm": 1.132026195526123,
6336
+ "learning_rate": 1.1526374568215465e-05,
6337
+ "loss": 0.603,
6338
+ "step": 36800
6339
+ },
6340
+ {
6341
+ "epoch": 2.8343973540496883,
6342
+ "grad_norm": 0.6755896210670471,
6343
+ "learning_rate": 1.1266654546398983e-05,
6344
+ "loss": 0.6041,
6345
+ "step": 36850
6346
+ },
6347
+ {
6348
+ "epoch": 2.8382432120606107,
6349
+ "grad_norm": 1.1434203386306763,
6350
+ "learning_rate": 1.1006934524582501e-05,
6351
+ "loss": 0.593,
6352
+ "step": 36900
6353
+ },
6354
+ {
6355
+ "epoch": 2.842089070071533,
6356
+ "grad_norm": 0.8664344549179077,
6357
+ "learning_rate": 1.0747214502766019e-05,
6358
+ "loss": 0.6159,
6359
+ "step": 36950
6360
+ },
6361
+ {
6362
+ "epoch": 2.845934928082455,
6363
+ "grad_norm": 1.2732676267623901,
6364
+ "learning_rate": 1.0487494480949537e-05,
6365
+ "loss": 0.6146,
6366
+ "step": 37000
6367
+ },
6368
+ {
6369
+ "epoch": 2.845934928082455,
6370
+ "eval_loss": 0.5988173484802246,
6371
+ "eval_runtime": 22.0557,
6372
+ "eval_samples_per_second": 22.67,
6373
+ "eval_steps_per_second": 5.667,
6374
+ "step": 37000
6375
  }
6376
  ],
6377
  "logging_steps": 50,