rootxhacker commited on
Commit
da063dc
·
verified ·
1 Parent(s): 3ffa2fb

Training in progress, step 30000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3d2b56b1909f10ec091b339e19d3deed68b4ef6036485c876749907ab70feae
3
  size 132187888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c51c95b62fe64237a3070b38732becb17c727e262f2a166cf9b540a95a07ffa4
3
  size 132187888
last-checkpoint/ar_diffusion_info.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7f39b7fecf41dc806808cbc1fcb7ebfc548308cac03fe5ff57a6ed111230c19
3
  size 1800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c956420b50cd1beed5b84ba3d1b039f88d0f55421c32ed697563eb48dbe3e467
3
  size 1800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d18bda7cc478b78a2baff9b2ff268d792c1bfcb109692f6f43cd01c1334af6e4
3
  size 264665786
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a6ad13ae7222893577b884130d3ce0bc936a3a1c24d4768f253e6c8a17784ab
3
  size 264665786
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b564041a05d5dec52405f82824f25abbc3402c3fee815ee33c0e6e880970bde
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:072a1f78d2c87d7bd95194d5360ec258a354ea618d79e75e77fefc7224373d1f
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d10fe23746f9663211426e22e1f688a86e95134ebca1ba9cc0e90e060038ab25
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db92c81e1d1adfede0ee8a02a6941b37c6dd7a52f54da9d0ece7ad64c5439504
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:424a6fa8dcc89ac1a4c9d7aceae072f365d903e39787d3dc1c38f7e0a9e82f96
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:961ccdacdfed72cc3f6e549530202fa2bdca81a4b4bb1d566331261ea797f82d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 29500,
3
- "best_metric": 0.6208207607269287,
4
- "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-29500",
5
- "epoch": 2.26905622644412,
6
  "eval_steps": 250,
7
- "global_step": 29500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -5082,6 +5082,92 @@
5082
  "eval_samples_per_second": 22.355,
5083
  "eval_steps_per_second": 5.589,
5084
  "step": 29500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5085
  }
5086
  ],
5087
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 30000,
3
+ "best_metric": 0.6152763962745667,
4
+ "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-30000",
5
+ "epoch": 2.307514806553342,
6
  "eval_steps": 250,
7
+ "global_step": 30000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
5082
  "eval_samples_per_second": 22.355,
5083
  "eval_steps_per_second": 5.589,
5084
  "step": 29500
5085
+ },
5086
+ {
5087
+ "epoch": 2.272902084455042,
5088
+ "grad_norm": 0.776597797870636,
5089
+ "learning_rate": 4.917019453029634e-05,
5090
+ "loss": 0.6117,
5091
+ "step": 29550
5092
+ },
5093
+ {
5094
+ "epoch": 2.2767479424659642,
5095
+ "grad_norm": 0.71247398853302,
5096
+ "learning_rate": 4.8910474508479856e-05,
5097
+ "loss": 0.6223,
5098
+ "step": 29600
5099
+ },
5100
+ {
5101
+ "epoch": 2.280593800476886,
5102
+ "grad_norm": 1.0202375650405884,
5103
+ "learning_rate": 4.865075448666338e-05,
5104
+ "loss": 0.6364,
5105
+ "step": 29650
5106
+ },
5107
+ {
5108
+ "epoch": 2.2844396584878086,
5109
+ "grad_norm": 0.7959633469581604,
5110
+ "learning_rate": 4.8391034464846895e-05,
5111
+ "loss": 0.6635,
5112
+ "step": 29700
5113
+ },
5114
+ {
5115
+ "epoch": 2.288285516498731,
5116
+ "grad_norm": 0.786555826663971,
5117
+ "learning_rate": 4.8131314443030415e-05,
5118
+ "loss": 0.6214,
5119
+ "step": 29750
5120
+ },
5121
+ {
5122
+ "epoch": 2.288285516498731,
5123
+ "eval_loss": 0.6206551790237427,
5124
+ "eval_runtime": 21.496,
5125
+ "eval_samples_per_second": 23.26,
5126
+ "eval_steps_per_second": 5.815,
5127
+ "step": 29750
5128
+ },
5129
+ {
5130
+ "epoch": 2.292131374509653,
5131
+ "grad_norm": 1.349275827407837,
5132
+ "learning_rate": 4.7871594421213934e-05,
5133
+ "loss": 0.5998,
5134
+ "step": 29800
5135
+ },
5136
+ {
5137
+ "epoch": 2.2959772325205754,
5138
+ "grad_norm": 1.2118281126022339,
5139
+ "learning_rate": 4.761187439939745e-05,
5140
+ "loss": 0.6384,
5141
+ "step": 29850
5142
+ },
5143
+ {
5144
+ "epoch": 2.299823090531498,
5145
+ "grad_norm": 0.6467416286468506,
5146
+ "learning_rate": 4.7352154377580974e-05,
5147
+ "loss": 0.6109,
5148
+ "step": 29900
5149
+ },
5150
+ {
5151
+ "epoch": 2.3036689485424198,
5152
+ "grad_norm": 0.626610279083252,
5153
+ "learning_rate": 4.7092434355764486e-05,
5154
+ "loss": 0.6444,
5155
+ "step": 29950
5156
+ },
5157
+ {
5158
+ "epoch": 2.307514806553342,
5159
+ "grad_norm": 0.8306999206542969,
5160
+ "learning_rate": 4.6832714333948006e-05,
5161
+ "loss": 0.6378,
5162
+ "step": 30000
5163
+ },
5164
+ {
5165
+ "epoch": 2.307514806553342,
5166
+ "eval_loss": 0.6152763962745667,
5167
+ "eval_runtime": 22.4259,
5168
+ "eval_samples_per_second": 22.296,
5169
+ "eval_steps_per_second": 5.574,
5170
+ "step": 30000
5171
  }
5172
  ],
5173
  "logging_steps": 50,