rootxhacker commited on
Commit
4bf765b
·
verified ·
1 Parent(s): b118f75

Training in progress, step 29500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f63d64404a064684fdbda3be6790c82213e5012889870b2d9e4cf77a54d9d94
3
  size 132187888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3d2b56b1909f10ec091b339e19d3deed68b4ef6036485c876749907ab70feae
3
  size 132187888
last-checkpoint/ar_diffusion_info.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4ef189210e24227c270ab8ae1c43df29bb9a4de77cf6f53f77a67953cd009cb
3
  size 1800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7f39b7fecf41dc806808cbc1fcb7ebfc548308cac03fe5ff57a6ed111230c19
3
  size 1800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b8b0ae065786b82411fe6cf483993355053626c4697eb99e68382a645ddf49d
3
  size 264665786
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d18bda7cc478b78a2baff9b2ff268d792c1bfcb109692f6f43cd01c1334af6e4
3
  size 264665786
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:993642485acda165f546ca7e2c94b3614d2a294dacdb0f3665a7c4444f2d0fae
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b564041a05d5dec52405f82824f25abbc3402c3fee815ee33c0e6e880970bde
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c0f1b8b8c05dac4caf0e0e8f3e8fa0d1dd356db027075fed7b90fc2d0a97d25
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d10fe23746f9663211426e22e1f688a86e95134ebca1ba9cc0e90e060038ab25
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f34d6605c96e2830680aa8a7a9e3362d332648b178b366947137c49386617a03
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:424a6fa8dcc89ac1a4c9d7aceae072f365d903e39787d3dc1c38f7e0a9e82f96
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 29000,
3
- "best_metric": 0.6262807250022888,
4
- "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-29000",
5
- "epoch": 2.230597646334897,
6
  "eval_steps": 250,
7
- "global_step": 29000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -4996,6 +4996,92 @@
4996
  "eval_samples_per_second": 22.48,
4997
  "eval_steps_per_second": 5.62,
4998
  "step": 29000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4999
  }
5000
  ],
5001
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 29500,
3
+ "best_metric": 0.6208207607269287,
4
+ "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-29500",
5
+ "epoch": 2.26905622644412,
6
  "eval_steps": 250,
7
+ "global_step": 29500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
4996
  "eval_samples_per_second": 22.48,
4997
  "eval_steps_per_second": 5.62,
4998
  "step": 29000
4999
+ },
5000
+ {
5001
+ "epoch": 2.2344435043458195,
5002
+ "grad_norm": 0.5085247755050659,
5003
+ "learning_rate": 5.176739474846116e-05,
5004
+ "loss": 0.6245,
5005
+ "step": 29050
5006
+ },
5007
+ {
5008
+ "epoch": 2.238289362356742,
5009
+ "grad_norm": 0.43597960472106934,
5010
+ "learning_rate": 5.150767472664469e-05,
5011
+ "loss": 0.6155,
5012
+ "step": 29100
5013
+ },
5014
+ {
5015
+ "epoch": 2.242135220367664,
5016
+ "grad_norm": 0.8605113625526428,
5017
+ "learning_rate": 5.124795470482819e-05,
5018
+ "loss": 0.6298,
5019
+ "step": 29150
5020
+ },
5021
+ {
5022
+ "epoch": 2.2459810783785863,
5023
+ "grad_norm": 0.5974554419517517,
5024
+ "learning_rate": 5.098823468301171e-05,
5025
+ "loss": 0.5789,
5026
+ "step": 29200
5027
+ },
5028
+ {
5029
+ "epoch": 2.2498269363895087,
5030
+ "grad_norm": 0.9765536785125732,
5031
+ "learning_rate": 5.072851466119524e-05,
5032
+ "loss": 0.6074,
5033
+ "step": 29250
5034
+ },
5035
+ {
5036
+ "epoch": 2.2498269363895087,
5037
+ "eval_loss": 0.6241350769996643,
5038
+ "eval_runtime": 21.3208,
5039
+ "eval_samples_per_second": 23.451,
5040
+ "eval_steps_per_second": 5.863,
5041
+ "step": 29250
5042
+ },
5043
+ {
5044
+ "epoch": 2.2536727944004307,
5045
+ "grad_norm": 0.8351141214370728,
5046
+ "learning_rate": 5.0468794639378745e-05,
5047
+ "loss": 0.6125,
5048
+ "step": 29300
5049
+ },
5050
+ {
5051
+ "epoch": 2.257518652411353,
5052
+ "grad_norm": 0.8782539367675781,
5053
+ "learning_rate": 5.020907461756227e-05,
5054
+ "loss": 0.6395,
5055
+ "step": 29350
5056
+ },
5057
+ {
5058
+ "epoch": 2.261364510422275,
5059
+ "grad_norm": 0.5191802978515625,
5060
+ "learning_rate": 4.994935459574579e-05,
5061
+ "loss": 0.6223,
5062
+ "step": 29400
5063
+ },
5064
+ {
5065
+ "epoch": 2.2652103684331975,
5066
+ "grad_norm": 1.0012739896774292,
5067
+ "learning_rate": 4.9689634573929304e-05,
5068
+ "loss": 0.5918,
5069
+ "step": 29450
5070
+ },
5071
+ {
5072
+ "epoch": 2.26905622644412,
5073
+ "grad_norm": 0.9906120300292969,
5074
+ "learning_rate": 4.942991455211283e-05,
5075
+ "loss": 0.6604,
5076
+ "step": 29500
5077
+ },
5078
+ {
5079
+ "epoch": 2.26905622644412,
5080
+ "eval_loss": 0.6208207607269287,
5081
+ "eval_runtime": 22.3662,
5082
+ "eval_samples_per_second": 22.355,
5083
+ "eval_steps_per_second": 5.589,
5084
+ "step": 29500
5085
  }
5086
  ],
5087
  "logging_steps": 50,