rootxhacker commited on
Commit
5a48b23
·
verified ·
1 Parent(s): 06b76f8

Training in progress, step 29000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bfc389ecdd34314f2f612c53571c8e1b47d7411169c05b6e5e9ef64751f9336
3
  size 132187888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f63d64404a064684fdbda3be6790c82213e5012889870b2d9e4cf77a54d9d94
3
  size 132187888
last-checkpoint/ar_diffusion_info.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e03d21f59c7390814d3683840e8e05a5e87623d8d2c21d59a6e20c5b10100a19
3
  size 1800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4ef189210e24227c270ab8ae1c43df29bb9a4de77cf6f53f77a67953cd009cb
3
  size 1800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c90e918c468147f036c1471580e464a3a1b2bd0dab65ebc2790466b8b13eec4
3
  size 264665786
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b8b0ae065786b82411fe6cf483993355053626c4697eb99e68382a645ddf49d
3
  size 264665786
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e97ab479ccef726fdca432f369bd24340a85bd69660c81814e1117757ffe24e
3
- size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:993642485acda165f546ca7e2c94b3614d2a294dacdb0f3665a7c4444f2d0fae
3
+ size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d31641ba54273e3788b2cfec72f598fb159706e78b7db0851c1ea541532286b
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c0f1b8b8c05dac4caf0e0e8f3e8fa0d1dd356db027075fed7b90fc2d0a97d25
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50474d23052aaa3aaeaaa97d7b77d240e068c7314f12dd1aceb1c97f3be499a2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f34d6605c96e2830680aa8a7a9e3362d332648b178b366947137c49386617a03
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 28500,
3
- "best_metric": 0.6302415132522583,
4
- "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-28500",
5
- "epoch": 2.192139066225675,
6
  "eval_steps": 250,
7
- "global_step": 28500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -4910,6 +4910,92 @@
4910
  "eval_samples_per_second": 22.493,
4911
  "eval_steps_per_second": 5.623,
4912
  "step": 28500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4913
  }
4914
  ],
4915
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 29000,
3
+ "best_metric": 0.6262807250022888,
4
+ "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-29000",
5
+ "epoch": 2.230597646334897,
6
  "eval_steps": 250,
7
+ "global_step": 29000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
4910
  "eval_samples_per_second": 22.493,
4911
  "eval_steps_per_second": 5.623,
4912
  "step": 28500
4913
+ },
4914
+ {
4915
+ "epoch": 2.195984924236597,
4916
+ "grad_norm": 0.8482813835144043,
4917
+ "learning_rate": 5.4364594966625985e-05,
4918
+ "loss": 0.6012,
4919
+ "step": 28550
4920
+ },
4921
+ {
4922
+ "epoch": 2.1998307822475196,
4923
+ "grad_norm": 0.7037524580955505,
4924
+ "learning_rate": 5.4104874944809504e-05,
4925
+ "loss": 0.6288,
4926
+ "step": 28600
4927
+ },
4928
+ {
4929
+ "epoch": 2.2036766402584416,
4930
+ "grad_norm": 1.0364506244659424,
4931
+ "learning_rate": 5.384515492299301e-05,
4932
+ "loss": 0.6607,
4933
+ "step": 28650
4934
+ },
4935
+ {
4936
+ "epoch": 2.207522498269364,
4937
+ "grad_norm": 1.1424225568771362,
4938
+ "learning_rate": 5.358543490117654e-05,
4939
+ "loss": 0.625,
4940
+ "step": 28700
4941
+ },
4942
+ {
4943
+ "epoch": 2.211368356280286,
4944
+ "grad_norm": 0.5791661143302917,
4945
+ "learning_rate": 5.3325714879360056e-05,
4946
+ "loss": 0.645,
4947
+ "step": 28750
4948
+ },
4949
+ {
4950
+ "epoch": 2.211368356280286,
4951
+ "eval_loss": 0.6294763088226318,
4952
+ "eval_runtime": 21.4089,
4953
+ "eval_samples_per_second": 23.355,
4954
+ "eval_steps_per_second": 5.839,
4955
+ "step": 28750
4956
+ },
4957
+ {
4958
+ "epoch": 2.2152142142912084,
4959
+ "grad_norm": 0.843608021736145,
4960
+ "learning_rate": 5.306599485754357e-05,
4961
+ "loss": 0.6421,
4962
+ "step": 28800
4963
+ },
4964
+ {
4965
+ "epoch": 2.2190600723021308,
4966
+ "grad_norm": 0.5737313628196716,
4967
+ "learning_rate": 5.280627483572709e-05,
4968
+ "loss": 0.6488,
4969
+ "step": 28850
4970
+ },
4971
+ {
4972
+ "epoch": 2.2229059303130527,
4973
+ "grad_norm": 1.0083036422729492,
4974
+ "learning_rate": 5.254655481391061e-05,
4975
+ "loss": 0.6355,
4976
+ "step": 28900
4977
+ },
4978
+ {
4979
+ "epoch": 2.226751788323975,
4980
+ "grad_norm": 0.8519378900527954,
4981
+ "learning_rate": 5.228683479209413e-05,
4982
+ "loss": 0.6291,
4983
+ "step": 28950
4984
+ },
4985
+ {
4986
+ "epoch": 2.230597646334897,
4987
+ "grad_norm": 0.8886232972145081,
4988
+ "learning_rate": 5.202711477027764e-05,
4989
+ "loss": 0.6481,
4990
+ "step": 29000
4991
+ },
4992
+ {
4993
+ "epoch": 2.230597646334897,
4994
+ "eval_loss": 0.6262807250022888,
4995
+ "eval_runtime": 22.2422,
4996
+ "eval_samples_per_second": 22.48,
4997
+ "eval_steps_per_second": 5.62,
4998
+ "step": 29000
4999
  }
5000
  ],
5001
  "logging_steps": 50,