rootxhacker commited on
Commit
c738367
·
verified ·
1 Parent(s): 7bfe2a2

Training in progress, step 28000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ca3d706ea7b8413378d7dd789d26590bb9d27693eb5684dfe2541f57da00801
3
  size 132187888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9780097950800686c9322dd13a3faf02ad28fadad548e324be714511b2714201
3
  size 132187888
last-checkpoint/ar_diffusion_info.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0984e1e86fb953cdc040e6e6b6d5c2eb1553ed1cba39d443fc3501a2accc58e1
3
  size 1800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0eeadcb7061b328806de65f859dc63fe68b92f70ad10c9992993064e4e91786
3
  size 1800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8ef2f7d956f7fd94db383c3d26daacd3b217c630b4544468381600be1281e1a
3
  size 264665786
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:897b5043f12ab5b080ec026763f5e58f9947bfeb9d3b5e68e21c2b6493f9e676
3
  size 264665786
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d5328130e1b637ad1c107a76d8bd3082a1841600df89695dbda06f8f0cf0c78
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91254c943660a3203eec1d026396102f3e0b80f3a8c66cc2f6add4b242607689
3
+ size 14180
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c348e94d192afc431e4d473787707f16140089a0b8e3f1a363efbb0ad68be7b9
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f013f71783c0dc60fa63541cd9fac53da38b82998b3731a80d7c115cba7f0bf1
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00e4057ddb93d4201ababcbc4c1556bd476cd599690ef79c5880a1b8a05ecfad
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff477fcb74f9478dd012f656769bb7c4aa62a4bc43e62fb29e6d1b12821b2b70
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 27500,
3
- "best_metric": 0.6441511511802673,
4
- "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-27500",
5
- "epoch": 2.11522190600723,
6
  "eval_steps": 250,
7
- "global_step": 27500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -4738,6 +4738,92 @@
4738
  "eval_samples_per_second": 22.397,
4739
  "eval_steps_per_second": 5.599,
4740
  "step": 27500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4741
  }
4742
  ],
4743
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 28000,
3
+ "best_metric": 0.6302720904350281,
4
+ "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-28000",
5
+ "epoch": 2.1536804861164525,
6
  "eval_steps": 250,
7
+ "global_step": 28000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
4738
  "eval_samples_per_second": 22.397,
4739
  "eval_steps_per_second": 5.599,
4740
  "step": 27500
4741
+ },
4742
+ {
4743
+ "epoch": 2.1190677640181526,
4744
+ "grad_norm": 0.9390980005264282,
4745
+ "learning_rate": 5.955899540295562e-05,
4746
+ "loss": 0.627,
4747
+ "step": 27550
4748
+ },
4749
+ {
4750
+ "epoch": 2.1229136220290745,
4751
+ "grad_norm": 1.0318114757537842,
4752
+ "learning_rate": 5.929927538113913e-05,
4753
+ "loss": 0.6328,
4754
+ "step": 27600
4755
+ },
4756
+ {
4757
+ "epoch": 2.126759480039997,
4758
+ "grad_norm": 0.8785481452941895,
4759
+ "learning_rate": 5.903955535932265e-05,
4760
+ "loss": 0.6715,
4761
+ "step": 27650
4762
+ },
4763
+ {
4764
+ "epoch": 2.1306053380509193,
4765
+ "grad_norm": 1.0937212705612183,
4766
+ "learning_rate": 5.877983533750617e-05,
4767
+ "loss": 0.6479,
4768
+ "step": 27700
4769
+ },
4770
+ {
4771
+ "epoch": 2.1344511960618413,
4772
+ "grad_norm": 0.7675368189811707,
4773
+ "learning_rate": 5.852011531568969e-05,
4774
+ "loss": 0.6473,
4775
+ "step": 27750
4776
+ },
4777
+ {
4778
+ "epoch": 2.1344511960618413,
4779
+ "eval_loss": 0.635150134563446,
4780
+ "eval_runtime": 21.3801,
4781
+ "eval_samples_per_second": 23.386,
4782
+ "eval_steps_per_second": 5.847,
4783
+ "step": 27750
4784
+ },
4785
+ {
4786
+ "epoch": 2.1382970540727637,
4787
+ "grad_norm": 1.0143834352493286,
4788
+ "learning_rate": 5.8260395293873204e-05,
4789
+ "loss": 0.6729,
4790
+ "step": 27800
4791
+ },
4792
+ {
4793
+ "epoch": 2.1421429120836857,
4794
+ "grad_norm": 0.970697283744812,
4795
+ "learning_rate": 5.8000675272056724e-05,
4796
+ "loss": 0.63,
4797
+ "step": 27850
4798
+ },
4799
+ {
4800
+ "epoch": 2.145988770094608,
4801
+ "grad_norm": 0.9831034541130066,
4802
+ "learning_rate": 5.774095525024025e-05,
4803
+ "loss": 0.6594,
4804
+ "step": 27900
4805
+ },
4806
+ {
4807
+ "epoch": 2.1498346281055305,
4808
+ "grad_norm": 0.9592450261116028,
4809
+ "learning_rate": 5.7481235228423756e-05,
4810
+ "loss": 0.657,
4811
+ "step": 27950
4812
+ },
4813
+ {
4814
+ "epoch": 2.1536804861164525,
4815
+ "grad_norm": 0.9553030133247375,
4816
+ "learning_rate": 5.722151520660728e-05,
4817
+ "loss": 0.6441,
4818
+ "step": 28000
4819
+ },
4820
+ {
4821
+ "epoch": 2.1536804861164525,
4822
+ "eval_loss": 0.6302720904350281,
4823
+ "eval_runtime": 22.2098,
4824
+ "eval_samples_per_second": 22.513,
4825
+ "eval_steps_per_second": 5.628,
4826
+ "step": 28000
4827
  }
4828
  ],
4829
  "logging_steps": 50,