rootxhacker commited on
Commit
741cba3
·
verified ·
1 Parent(s): ffcc431

Training in progress, step 33500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0017e49ae1179cf7a55f9321475e16cc1d09f3892ce98421f7979982523ebb9
3
  size 132187888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cba8cb77ffe78e2d20b2d0ccc4c9669535c480fcc7dc13618e40879b6569a4ef
3
  size 132187888
last-checkpoint/ar_diffusion_info.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbd1ae1aad603802c82ed073dbd51ff3ad9656e5ab94f3bf3f7d0f8825c93935
3
  size 1800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c065148843d8271c381f3d8b1e806505a52caa006aaab9e14474604a503f994
3
  size 1800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30ff43ec8181901deebb427ddf6e8551d899dcdbad73bc946f675627d7969b43
3
  size 264665786
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d9ffa1c5d4bb6cb13fd3129b2255256a3ec74888dd1726ff04d1a2ff740b6b3
3
  size 264665786
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e489367499db5673a7d65f2da50fa2212ff1d663a956b395bb78fec25dc26db1
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eff35bb1a8c46c5468e2039629e000a02a24eb92defc378676def9fc2ee080f9
3
+ size 14308
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79aee1eff14611f1a5c55aaad748b38158c5fb7de7271250f2ef9599cd3ffe75
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6d920d97680fbe7b80b71b492e592480f373318cca68f37e407be6a777bba52
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99a95e7d3fa1b2f1e9c1ee3dba6fdbbd55025c6a74911523945c9d063de75830
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff9711516719328bf9804dafd0879b843ab233063e11999f87b9c16f7278b99d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 31000,
3
  "best_metric": 0.6043956279754639,
4
  "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-31000",
5
- "epoch": 2.5382662872086765,
6
  "eval_steps": 250,
7
- "global_step": 33000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -5684,6 +5684,92 @@
5684
  "eval_samples_per_second": 22.504,
5685
  "eval_steps_per_second": 5.626,
5686
  "step": 33000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5687
  }
5688
  ],
5689
  "logging_steps": 50,
 
2
  "best_global_step": 31000,
3
  "best_metric": 0.6043956279754639,
4
  "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-31000",
5
+ "epoch": 2.576724867317899,
6
  "eval_steps": 250,
7
+ "global_step": 33500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
5684
  "eval_samples_per_second": 22.504,
5685
  "eval_steps_per_second": 5.626,
5686
  "step": 33000
5687
+ },
5688
+ {
5689
+ "epoch": 2.5421121452195985,
5690
+ "grad_norm": 0.7773544192314148,
5691
+ "learning_rate": 3.0994987403578946e-05,
5692
+ "loss": 0.6361,
5693
+ "step": 33050
5694
+ },
5695
+ {
5696
+ "epoch": 2.5459580032305205,
5697
+ "grad_norm": 0.8739262819290161,
5698
+ "learning_rate": 3.073526738176246e-05,
5699
+ "loss": 0.6022,
5700
+ "step": 33100
5701
+ },
5702
+ {
5703
+ "epoch": 2.549803861241443,
5704
+ "grad_norm": 0.9114782214164734,
5705
+ "learning_rate": 3.047554735994598e-05,
5706
+ "loss": 0.6414,
5707
+ "step": 33150
5708
+ },
5709
+ {
5710
+ "epoch": 2.5536497192523653,
5711
+ "grad_norm": 0.878693163394928,
5712
+ "learning_rate": 3.0215827338129498e-05,
5713
+ "loss": 0.6194,
5714
+ "step": 33200
5715
+ },
5716
+ {
5717
+ "epoch": 2.5574955772632872,
5718
+ "grad_norm": 0.9344619512557983,
5719
+ "learning_rate": 2.9956107316313014e-05,
5720
+ "loss": 0.6077,
5721
+ "step": 33250
5722
+ },
5723
+ {
5724
+ "epoch": 2.5574955772632872,
5725
+ "eval_loss": 0.6252104640007019,
5726
+ "eval_runtime": 21.3869,
5727
+ "eval_samples_per_second": 23.379,
5728
+ "eval_steps_per_second": 5.845,
5729
+ "step": 33250
5730
+ },
5731
+ {
5732
+ "epoch": 2.5613414352742097,
5733
+ "grad_norm": 1.3236424922943115,
5734
+ "learning_rate": 2.9696387294496537e-05,
5735
+ "loss": 0.6356,
5736
+ "step": 33300
5737
+ },
5738
+ {
5739
+ "epoch": 2.565187293285132,
5740
+ "grad_norm": 1.0532996654510498,
5741
+ "learning_rate": 2.9436667272680054e-05,
5742
+ "loss": 0.6193,
5743
+ "step": 33350
5744
+ },
5745
+ {
5746
+ "epoch": 2.569033151296054,
5747
+ "grad_norm": 0.8525074124336243,
5748
+ "learning_rate": 2.9176947250863566e-05,
5749
+ "loss": 0.58,
5750
+ "step": 33400
5751
+ },
5752
+ {
5753
+ "epoch": 2.5728790093069764,
5754
+ "grad_norm": 1.3966562747955322,
5755
+ "learning_rate": 2.891722722904709e-05,
5756
+ "loss": 0.6519,
5757
+ "step": 33450
5758
+ },
5759
+ {
5760
+ "epoch": 2.576724867317899,
5761
+ "grad_norm": 0.8174068927764893,
5762
+ "learning_rate": 2.8657507207230606e-05,
5763
+ "loss": 0.5824,
5764
+ "step": 33500
5765
+ },
5766
+ {
5767
+ "epoch": 2.576724867317899,
5768
+ "eval_loss": 0.6210461258888245,
5769
+ "eval_runtime": 22.1944,
5770
+ "eval_samples_per_second": 22.528,
5771
+ "eval_steps_per_second": 5.632,
5772
+ "step": 33500
5773
  }
5774
  ],
5775
  "logging_steps": 50,