rootxhacker commited on
Commit
c5cb45b
·
verified ·
1 Parent(s): a08aa68

Training in progress, step 34500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:387eaea2b60677f1bca5f4689e58687d4d68c1a3bd12a633dbaa83e1a472eadb
3
  size 132187888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:581c31679972bdcfdd93694ec7c812b17d40019a808688f579a943d85463c904
3
  size 132187888
last-checkpoint/ar_diffusion_info.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed09fd0bbee5530eed7299419b0a8bf04c1f5eb432d7d005305a84b7653ae3cf
3
  size 1800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5748fa82d57453d9a0fe9a462a4395564f717f50912fdd1523a9305b512fa60
3
  size 1800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54c02c28239c3fb2bd3b6a0347baac24ca1abcc8f6c7036565e9c2726c285b1e
3
  size 264665786
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf55f9271c729abd81f424026c2d810f5ae513f2dba23347c474641256e114eb
3
  size 264665786
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6cc7154848c18f975fe3126d040aa5665065e37899e2bed962f5bb2fd23e3c0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9ce460b1138284b49eaf780f57970d8521e6ca2b85cc1fe914b4f8e171c90fa
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73b82920d494ad0b9c6b5781afaec447f97db85d9f0641dba380ac45674e29c1
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d90106738b2858e018cd3ec3d64f24c362a10f5e044b671422e6245917f6420f
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2206e3ea9caa91bbe357a176c4a03573c2b47177cf241fe9772382f8b2e0ca8f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b632fa657d89cf9d024b864a979b41e9d4484feec7658b4550443a85b8f54ac3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 31000,
3
  "best_metric": 0.6043956279754639,
4
  "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-31000",
5
- "epoch": 2.615183447427121,
6
  "eval_steps": 250,
7
- "global_step": 34000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -5856,6 +5856,92 @@
5856
  "eval_samples_per_second": 22.562,
5857
  "eval_steps_per_second": 5.64,
5858
  "step": 34000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5859
  }
5860
  ],
5861
  "logging_steps": 50,
 
2
  "best_global_step": 31000,
3
  "best_metric": 0.6043956279754639,
4
  "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-31000",
5
+ "epoch": 2.6536420275363435,
6
  "eval_steps": 250,
7
+ "global_step": 34500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
5856
  "eval_samples_per_second": 22.562,
5857
  "eval_steps_per_second": 5.64,
5858
  "step": 34000
5859
+ },
5860
+ {
5861
+ "epoch": 2.619029305438043,
5862
+ "grad_norm": 0.581876814365387,
5863
+ "learning_rate": 2.580578136768564e-05,
5864
+ "loss": 0.6315,
5865
+ "step": 34050
5866
+ },
5867
+ {
5868
+ "epoch": 2.6228751634489655,
5869
+ "grad_norm": 0.8881607055664062,
5870
+ "learning_rate": 2.5546061345869156e-05,
5871
+ "loss": 0.603,
5872
+ "step": 34100
5873
+ },
5874
+ {
5875
+ "epoch": 2.626721021459888,
5876
+ "grad_norm": 0.4935351610183716,
5877
+ "learning_rate": 2.5286341324052672e-05,
5878
+ "loss": 0.6311,
5879
+ "step": 34150
5880
+ },
5881
+ {
5882
+ "epoch": 2.63056687947081,
5883
+ "grad_norm": 0.8172516226768494,
5884
+ "learning_rate": 2.502662130223619e-05,
5885
+ "loss": 0.6029,
5886
+ "step": 34200
5887
+ },
5888
+ {
5889
+ "epoch": 2.6344127374817323,
5890
+ "grad_norm": 0.6940003037452698,
5891
+ "learning_rate": 2.4766901280419708e-05,
5892
+ "loss": 0.6334,
5893
+ "step": 34250
5894
+ },
5895
+ {
5896
+ "epoch": 2.6344127374817323,
5897
+ "eval_loss": 0.6158848404884338,
5898
+ "eval_runtime": 21.2951,
5899
+ "eval_samples_per_second": 23.48,
5900
+ "eval_steps_per_second": 5.87,
5901
+ "step": 34250
5902
+ },
5903
+ {
5904
+ "epoch": 2.6382585954926543,
5905
+ "grad_norm": 0.9116266965866089,
5906
+ "learning_rate": 2.4507181258603227e-05,
5907
+ "loss": 0.6716,
5908
+ "step": 34300
5909
+ },
5910
+ {
5911
+ "epoch": 2.6421044535035767,
5912
+ "grad_norm": 0.8472510576248169,
5913
+ "learning_rate": 2.4247461236786744e-05,
5914
+ "loss": 0.5982,
5915
+ "step": 34350
5916
+ },
5917
+ {
5918
+ "epoch": 2.6459503115144987,
5919
+ "grad_norm": 0.9200981855392456,
5920
+ "learning_rate": 2.3987741214970263e-05,
5921
+ "loss": 0.6111,
5922
+ "step": 34400
5923
+ },
5924
+ {
5925
+ "epoch": 2.649796169525421,
5926
+ "grad_norm": 0.7209369540214539,
5927
+ "learning_rate": 2.3728021193153783e-05,
5928
+ "loss": 0.6498,
5929
+ "step": 34450
5930
+ },
5931
+ {
5932
+ "epoch": 2.6536420275363435,
5933
+ "grad_norm": 1.0441888570785522,
5934
+ "learning_rate": 2.3468301171337302e-05,
5935
+ "loss": 0.5979,
5936
+ "step": 34500
5937
+ },
5938
+ {
5939
+ "epoch": 2.6536420275363435,
5940
+ "eval_loss": 0.6152419447898865,
5941
+ "eval_runtime": 22.3142,
5942
+ "eval_samples_per_second": 22.407,
5943
+ "eval_steps_per_second": 5.602,
5944
+ "step": 34500
5945
  }
5946
  ],
5947
  "logging_steps": 50,