rootxhacker commited on
Commit
e20086c
·
verified ·
1 Parent(s): d85d59b

Training in progress, step 30500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c51c95b62fe64237a3070b38732becb17c727e262f2a166cf9b540a95a07ffa4
3
  size 132187888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f72bd5a3c71ee7c0378f6c13844dc64c89949a7e7e1e07f8933751b8df932c7
3
  size 132187888
last-checkpoint/ar_diffusion_info.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c956420b50cd1beed5b84ba3d1b039f88d0f55421c32ed697563eb48dbe3e467
3
  size 1800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e85124ad974e9aa6591318b973e27a8c6a21355c73a2b688604b07888ca77bfd
3
  size 1800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a6ad13ae7222893577b884130d3ce0bc936a3a1c24d4768f253e6c8a17784ab
3
  size 264665786
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bfbd83026836fb7e11711c7d54e9b00d7e03835d945121611d08eb2713a80bd
3
  size 264665786
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:072a1f78d2c87d7bd95194d5360ec258a354ea618d79e75e77fefc7224373d1f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e898de494648db9b26425188387425acd19b2117f233464d12d3be9a3ed8c13
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db92c81e1d1adfede0ee8a02a6941b37c6dd7a52f54da9d0ece7ad64c5439504
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:851ff73959206b5bf637351b456e6acc61cd20020d42c449aed8671d33f868f2
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:961ccdacdfed72cc3f6e549530202fa2bdca81a4b4bb1d566331261ea797f82d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52513f6a22920e0a6a384b951f3cbc4c30d39da66d57f762b33f885b2ce6ee0e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 30000,
3
- "best_metric": 0.6152763962745667,
4
- "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-30000",
5
- "epoch": 2.307514806553342,
6
  "eval_steps": 250,
7
- "global_step": 30000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -5168,6 +5168,92 @@
5168
  "eval_samples_per_second": 22.296,
5169
  "eval_steps_per_second": 5.574,
5170
  "step": 30000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5171
  }
5172
  ],
5173
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 30500,
3
+ "best_metric": 0.6086920499801636,
4
+ "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-30500",
5
+ "epoch": 2.3459733866625645,
6
  "eval_steps": 250,
7
+ "global_step": 30500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
5168
  "eval_samples_per_second": 22.296,
5169
  "eval_steps_per_second": 5.574,
5170
  "step": 30000
5171
+ },
5172
+ {
5173
+ "epoch": 2.311360664564264,
5174
+ "grad_norm": 0.9155976176261902,
5175
+ "learning_rate": 4.6572994312131526e-05,
5176
+ "loss": 0.6241,
5177
+ "step": 30050
5178
+ },
5179
+ {
5180
+ "epoch": 2.3152065225751866,
5181
+ "grad_norm": 0.861916720867157,
5182
+ "learning_rate": 4.6313274290315045e-05,
5183
+ "loss": 0.6359,
5184
+ "step": 30100
5185
+ },
5186
+ {
5187
+ "epoch": 2.3190523805861085,
5188
+ "grad_norm": 0.9024108052253723,
5189
+ "learning_rate": 4.605355426849856e-05,
5190
+ "loss": 0.6343,
5191
+ "step": 30150
5192
+ },
5193
+ {
5194
+ "epoch": 2.322898238597031,
5195
+ "grad_norm": 1.2562297582626343,
5196
+ "learning_rate": 4.579383424668208e-05,
5197
+ "loss": 0.5956,
5198
+ "step": 30200
5199
+ },
5200
+ {
5201
+ "epoch": 2.3267440966079533,
5202
+ "grad_norm": 0.7998838424682617,
5203
+ "learning_rate": 4.55341142248656e-05,
5204
+ "loss": 0.6162,
5205
+ "step": 30250
5206
+ },
5207
+ {
5208
+ "epoch": 2.3267440966079533,
5209
+ "eval_loss": 0.6139496564865112,
5210
+ "eval_runtime": 21.4503,
5211
+ "eval_samples_per_second": 23.31,
5212
+ "eval_steps_per_second": 5.827,
5213
+ "step": 30250
5214
+ },
5215
+ {
5216
+ "epoch": 2.3305899546188753,
5217
+ "grad_norm": 0.6242274641990662,
5218
+ "learning_rate": 4.527439420304912e-05,
5219
+ "loss": 0.5912,
5220
+ "step": 30300
5221
+ },
5222
+ {
5223
+ "epoch": 2.3344358126297977,
5224
+ "grad_norm": 1.1920520067214966,
5225
+ "learning_rate": 4.5014674181232637e-05,
5226
+ "loss": 0.6258,
5227
+ "step": 30350
5228
+ },
5229
+ {
5230
+ "epoch": 2.33828167064072,
5231
+ "grad_norm": 1.370483160018921,
5232
+ "learning_rate": 4.475495415941615e-05,
5233
+ "loss": 0.6229,
5234
+ "step": 30400
5235
+ },
5236
+ {
5237
+ "epoch": 2.342127528651642,
5238
+ "grad_norm": 1.0605512857437134,
5239
+ "learning_rate": 4.449523413759967e-05,
5240
+ "loss": 0.6621,
5241
+ "step": 30450
5242
+ },
5243
+ {
5244
+ "epoch": 2.3459733866625645,
5245
+ "grad_norm": 0.874275803565979,
5246
+ "learning_rate": 4.423551411578319e-05,
5247
+ "loss": 0.6004,
5248
+ "step": 30500
5249
+ },
5250
+ {
5251
+ "epoch": 2.3459733866625645,
5252
+ "eval_loss": 0.6086920499801636,
5253
+ "eval_runtime": 22.3525,
5254
+ "eval_samples_per_second": 22.369,
5255
+ "eval_steps_per_second": 5.592,
5256
+ "step": 30500
5257
  }
5258
  ],
5259
  "logging_steps": 50,