rootxhacker commited on
Commit
52e7c6c
·
verified ·
1 Parent(s): 1632a37

Training in progress, step 37500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c9908eb2bd5f9beaa06015a751a042f84d87660bd9118a9d8c6df3afc04ac10
3
  size 132187888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d960d4cf1314782ff9fa34b4fa4ff52147a6a3edb83af4b92d07e2bfe2de068c
3
  size 132187888
last-checkpoint/ar_diffusion_info.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8df926c54955b4f050345ff87bc95e0eaf9e14e0c202091aed069141a6d8050
3
  size 1800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3c47f9755f99713864a7b4e3ec92efc30fda75d652ffd65ed16182316bd0cf3
3
  size 1800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4baf25384281505a1f4c020627ece1722b2a1cb0bdf59122f0338fb59149157c
3
  size 264665786
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae4ce6863400bf14083af33a7f6b1eb29b1d7a79f7ec21fe556f80564974288f
3
  size 264665786
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8956824c98764344b0f23bb58a4085e09bf86c1c62227126501658f2249b0da6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52e415f584d39fd895ae020039e8e625bf0351da63110c6500e3a11320d6621f
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8dbf48616e0aa20785358bd8c57fa652f00571ca576de0c652d60cefc5452b44
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:689e59e066b48be0feeeeb12da6d1787ebe3b841285102ea7503ee5943525726
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bc995789ac7ace85eec5527f15f9a82c9f1388944ba2d5baa678f54ce3d8943
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23ce717bb1ff7d19ce8b39673c5e006d14b3fec124190d834c88a63ab05da6d0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 37000,
3
- "best_metric": 0.5988173484802246,
4
- "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-37000",
5
- "epoch": 2.845934928082455,
6
  "eval_steps": 250,
7
- "global_step": 37000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -6372,6 +6372,92 @@
6372
  "eval_samples_per_second": 22.67,
6373
  "eval_steps_per_second": 5.667,
6374
  "step": 37000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6375
  }
6376
  ],
6377
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 37500,
3
+ "best_metric": 0.595755934715271,
4
+ "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-37500",
5
+ "epoch": 2.8843935081916774,
6
  "eval_steps": 250,
7
+ "global_step": 37500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
6372
  "eval_samples_per_second": 22.67,
6373
  "eval_steps_per_second": 5.667,
6374
  "step": 37000
6375
+ },
6376
+ {
6377
+ "epoch": 2.8497807860933775,
6378
+ "grad_norm": 1.1380438804626465,
6379
+ "learning_rate": 1.0227774459133055e-05,
6380
+ "loss": 0.6012,
6381
+ "step": 37050
6382
+ },
6383
+ {
6384
+ "epoch": 2.8536266441043,
6385
+ "grad_norm": 1.128145456314087,
6386
+ "learning_rate": 9.968054437316573e-06,
6387
+ "loss": 0.6357,
6388
+ "step": 37100
6389
+ },
6390
+ {
6391
+ "epoch": 2.857472502115222,
6392
+ "grad_norm": 0.8118870854377747,
6393
+ "learning_rate": 9.70833441550009e-06,
6394
+ "loss": 0.6067,
6395
+ "step": 37150
6396
+ },
6397
+ {
6398
+ "epoch": 2.8613183601261443,
6399
+ "grad_norm": 0.7695690989494324,
6400
+ "learning_rate": 9.448614393683609e-06,
6401
+ "loss": 0.6244,
6402
+ "step": 37200
6403
+ },
6404
+ {
6405
+ "epoch": 2.8651642181370662,
6406
+ "grad_norm": 0.7649264931678772,
6407
+ "learning_rate": 9.188894371867128e-06,
6408
+ "loss": 0.5699,
6409
+ "step": 37250
6410
+ },
6411
+ {
6412
+ "epoch": 2.8651642181370662,
6413
+ "eval_loss": 0.5982791781425476,
6414
+ "eval_runtime": 21.6121,
6415
+ "eval_samples_per_second": 23.135,
6416
+ "eval_steps_per_second": 5.784,
6417
+ "step": 37250
6418
+ },
6419
+ {
6420
+ "epoch": 2.8690100761479886,
6421
+ "grad_norm": 1.078489899635315,
6422
+ "learning_rate": 8.929174350050646e-06,
6423
+ "loss": 0.5912,
6424
+ "step": 37300
6425
+ },
6426
+ {
6427
+ "epoch": 2.8728559341589106,
6428
+ "grad_norm": 0.9969648122787476,
6429
+ "learning_rate": 8.669454328234164e-06,
6430
+ "loss": 0.6355,
6431
+ "step": 37350
6432
+ },
6433
+ {
6434
+ "epoch": 2.876701792169833,
6435
+ "grad_norm": 0.41206106543540955,
6436
+ "learning_rate": 8.409734306417682e-06,
6437
+ "loss": 0.6049,
6438
+ "step": 37400
6439
+ },
6440
+ {
6441
+ "epoch": 2.8805476501807554,
6442
+ "grad_norm": 0.9303924441337585,
6443
+ "learning_rate": 8.1500142846012e-06,
6444
+ "loss": 0.5802,
6445
+ "step": 37450
6446
+ },
6447
+ {
6448
+ "epoch": 2.8843935081916774,
6449
+ "grad_norm": 0.9477717876434326,
6450
+ "learning_rate": 7.890294262784718e-06,
6451
+ "loss": 0.627,
6452
+ "step": 37500
6453
+ },
6454
+ {
6455
+ "epoch": 2.8843935081916774,
6456
+ "eval_loss": 0.595755934715271,
6457
+ "eval_runtime": 22.0699,
6458
+ "eval_samples_per_second": 22.655,
6459
+ "eval_steps_per_second": 5.664,
6460
+ "step": 37500
6461
  }
6462
  ],
6463
  "logging_steps": 50,