rootxhacker commited on
Commit
eb32176
·
verified ·
1 Parent(s): a2cf3a9

Training in progress, step 34000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cba8cb77ffe78e2d20b2d0ccc4c9669535c480fcc7dc13618e40879b6569a4ef
3
  size 132187888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:387eaea2b60677f1bca5f4689e58687d4d68c1a3bd12a633dbaa83e1a472eadb
3
  size 132187888
last-checkpoint/ar_diffusion_info.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c065148843d8271c381f3d8b1e806505a52caa006aaab9e14474604a503f994
3
  size 1800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed09fd0bbee5530eed7299419b0a8bf04c1f5eb432d7d005305a84b7653ae3cf
3
  size 1800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d9ffa1c5d4bb6cb13fd3129b2255256a3ec74888dd1726ff04d1a2ff740b6b3
3
  size 264665786
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54c02c28239c3fb2bd3b6a0347baac24ca1abcc8f6c7036565e9c2726c285b1e
3
  size 264665786
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eff35bb1a8c46c5468e2039629e000a02a24eb92defc378676def9fc2ee080f9
3
- size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6cc7154848c18f975fe3126d040aa5665065e37899e2bed962f5bb2fd23e3c0
3
+ size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6d920d97680fbe7b80b71b492e592480f373318cca68f37e407be6a777bba52
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73b82920d494ad0b9c6b5781afaec447f97db85d9f0641dba380ac45674e29c1
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff9711516719328bf9804dafd0879b843ab233063e11999f87b9c16f7278b99d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2206e3ea9caa91bbe357a176c4a03573c2b47177cf241fe9772382f8b2e0ca8f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 31000,
3
  "best_metric": 0.6043956279754639,
4
  "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-31000",
5
- "epoch": 2.576724867317899,
6
  "eval_steps": 250,
7
- "global_step": 33500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -5770,6 +5770,92 @@
5770
  "eval_samples_per_second": 22.528,
5771
  "eval_steps_per_second": 5.632,
5772
  "step": 33500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5773
  }
5774
  ],
5775
  "logging_steps": 50,
 
2
  "best_global_step": 31000,
3
  "best_metric": 0.6043956279754639,
4
  "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-31000",
5
+ "epoch": 2.615183447427121,
6
  "eval_steps": 250,
7
+ "global_step": 34000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
5770
  "eval_samples_per_second": 22.528,
5771
  "eval_steps_per_second": 5.632,
5772
  "step": 33500
5773
+ },
5774
+ {
5775
+ "epoch": 2.580570725328821,
5776
+ "grad_norm": 0.5690587759017944,
5777
+ "learning_rate": 2.8397787185414125e-05,
5778
+ "loss": 0.6188,
5779
+ "step": 33550
5780
+ },
5781
+ {
5782
+ "epoch": 2.5844165833397432,
5783
+ "grad_norm": 0.9739165902137756,
5784
+ "learning_rate": 2.813806716359764e-05,
5785
+ "loss": 0.6377,
5786
+ "step": 33600
5787
+ },
5788
+ {
5789
+ "epoch": 2.588262441350665,
5790
+ "grad_norm": 0.7335163354873657,
5791
+ "learning_rate": 2.788354154221749e-05,
5792
+ "loss": 0.6031,
5793
+ "step": 33650
5794
+ },
5795
+ {
5796
+ "epoch": 2.5921082993615876,
5797
+ "grad_norm": 0.8928486108779907,
5798
+ "learning_rate": 2.7623821520401012e-05,
5799
+ "loss": 0.6042,
5800
+ "step": 33700
5801
+ },
5802
+ {
5803
+ "epoch": 2.5959541573725096,
5804
+ "grad_norm": 0.8350071907043457,
5805
+ "learning_rate": 2.7364101498584525e-05,
5806
+ "loss": 0.6349,
5807
+ "step": 33750
5808
+ },
5809
+ {
5810
+ "epoch": 2.5959541573725096,
5811
+ "eval_loss": 0.6190235018730164,
5812
+ "eval_runtime": 21.555,
5813
+ "eval_samples_per_second": 23.196,
5814
+ "eval_steps_per_second": 5.799,
5815
+ "step": 33750
5816
+ },
5817
+ {
5818
+ "epoch": 2.599800015383432,
5819
+ "grad_norm": 0.8973419070243835,
5820
+ "learning_rate": 2.710438147676804e-05,
5821
+ "loss": 0.6201,
5822
+ "step": 33800
5823
+ },
5824
+ {
5825
+ "epoch": 2.6036458733943544,
5826
+ "grad_norm": 0.9094308018684387,
5827
+ "learning_rate": 2.6844661454951564e-05,
5828
+ "loss": 0.648,
5829
+ "step": 33850
5830
+ },
5831
+ {
5832
+ "epoch": 2.6074917314052763,
5833
+ "grad_norm": 1.1847707033157349,
5834
+ "learning_rate": 2.658494143313508e-05,
5835
+ "loss": 0.6052,
5836
+ "step": 33900
5837
+ },
5838
+ {
5839
+ "epoch": 2.6113375894161988,
5840
+ "grad_norm": 1.0160280466079712,
5841
+ "learning_rate": 2.63252214113186e-05,
5842
+ "loss": 0.5866,
5843
+ "step": 33950
5844
+ },
5845
+ {
5846
+ "epoch": 2.615183447427121,
5847
+ "grad_norm": 0.8359413743019104,
5848
+ "learning_rate": 2.6065501389502116e-05,
5849
+ "loss": 0.6265,
5850
+ "step": 34000
5851
+ },
5852
+ {
5853
+ "epoch": 2.615183447427121,
5854
+ "eval_loss": 0.6169971227645874,
5855
+ "eval_runtime": 22.1614,
5856
+ "eval_samples_per_second": 22.562,
5857
+ "eval_steps_per_second": 5.64,
5858
+ "step": 34000
5859
  }
5860
  ],
5861
  "logging_steps": 50,