rootxhacker commited on
Commit
5274ae8
·
verified ·
1 Parent(s): f320df9

Training in progress, step 19000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a40b3ec9fac131b936a25a3717ad20559ac04c1beac8d89dd6e2dfe4e69599f5
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:311747411e2afa5d7eccbe8cecbb8cdb7e90a57933bbd475e13a86444c738348
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0c5233fd48a8ddd485f6a2ec03ff9389775661db4a9e073cc9e6578c35f6acc
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2f809198f83026b95c49f46554873d5da0e0e71fe0da56c9b70dbf1713b8974
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a637703e63dd26bf3fc5d45021980dfb8c6c15b36b34b2b812e241dcfa5b3276
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c57dd490464340c9b3aa212b9d3844b50cfd1f6b44323e804e87459d3ff333e
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac82d08ac6c84227c6b48fa89b5e8a7f058764ced7fff8850851393f056e0943
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:792eb1c103c6d8002801463d13e95f663702a503555882676ea8b65ffc1a8b9a
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1ac76f538d93ba96883051bed95b0b8b1845abfe936b318341f400f4768f02a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:948224c00888abda491e877156775666f24f3b7dcd8d2d0a471fc44df7d812bd
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 16750,
3
- "best_metric": 1.4708431959152222,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-15500",
5
- "epoch": 1.4229674640412275,
6
  "eval_steps": 250,
7
- "global_step": 18500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3190,6 +3190,92 @@
3190
  "eval_samples_per_second": 54.904,
3191
  "eval_steps_per_second": 13.726,
3192
  "step": 18500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3193
  }
3194
  ],
3195
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 18750,
3
+ "best_metric": 1.4680087566375732,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-15500",
5
+ "epoch": 1.4614260441504499,
6
  "eval_steps": 250,
7
+ "global_step": 19000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3190
  "eval_samples_per_second": 54.904,
3191
  "eval_steps_per_second": 13.726,
3192
  "step": 18500
3193
+ },
3194
+ {
3195
+ "epoch": 1.42681332205215,
3196
+ "grad_norm": 1.0496591329574585,
3197
+ "learning_rate": 4.5096871438550054e-05,
3198
+ "loss": 1.3723,
3199
+ "step": 18550
3200
+ },
3201
+ {
3202
+ "epoch": 1.4306591800630721,
3203
+ "grad_norm": 1.273758053779602,
3204
+ "learning_rate": 4.483952795853093e-05,
3205
+ "loss": 1.4747,
3206
+ "step": 18600
3207
+ },
3208
+ {
3209
+ "epoch": 1.4345050380739943,
3210
+ "grad_norm": 1.3594483137130737,
3211
+ "learning_rate": 4.458218447851181e-05,
3212
+ "loss": 1.564,
3213
+ "step": 18650
3214
+ },
3215
+ {
3216
+ "epoch": 1.4383508960849165,
3217
+ "grad_norm": 1.773634672164917,
3218
+ "learning_rate": 4.43248409984927e-05,
3219
+ "loss": 1.4344,
3220
+ "step": 18700
3221
+ },
3222
+ {
3223
+ "epoch": 1.4421967540958387,
3224
+ "grad_norm": 0.7939924001693726,
3225
+ "learning_rate": 4.406749751847358e-05,
3226
+ "loss": 1.3798,
3227
+ "step": 18750
3228
+ },
3229
+ {
3230
+ "epoch": 1.4421967540958387,
3231
+ "eval_loss": 1.4680087566375732,
3232
+ "eval_runtime": 18.0287,
3233
+ "eval_samples_per_second": 55.467,
3234
+ "eval_steps_per_second": 13.867,
3235
+ "step": 18750
3236
+ },
3237
+ {
3238
+ "epoch": 1.446042612106761,
3239
+ "grad_norm": 1.4785016775131226,
3240
+ "learning_rate": 4.3810154038454466e-05,
3241
+ "loss": 1.5316,
3242
+ "step": 18800
3243
+ },
3244
+ {
3245
+ "epoch": 1.4498884701176833,
3246
+ "grad_norm": 2.1929142475128174,
3247
+ "learning_rate": 4.355281055843535e-05,
3248
+ "loss": 1.4498,
3249
+ "step": 18850
3250
+ },
3251
+ {
3252
+ "epoch": 1.4537343281286055,
3253
+ "grad_norm": 1.816432237625122,
3254
+ "learning_rate": 4.3295467078416225e-05,
3255
+ "loss": 1.5089,
3256
+ "step": 18900
3257
+ },
3258
+ {
3259
+ "epoch": 1.4575801861395277,
3260
+ "grad_norm": 2.589778423309326,
3261
+ "learning_rate": 4.303812359839711e-05,
3262
+ "loss": 1.4011,
3263
+ "step": 18950
3264
+ },
3265
+ {
3266
+ "epoch": 1.4614260441504499,
3267
+ "grad_norm": 1.6828664541244507,
3268
+ "learning_rate": 4.2780780118378e-05,
3269
+ "loss": 1.3803,
3270
+ "step": 19000
3271
+ },
3272
+ {
3273
+ "epoch": 1.4614260441504499,
3274
+ "eval_loss": 1.4737956523895264,
3275
+ "eval_runtime": 17.9628,
3276
+ "eval_samples_per_second": 55.67,
3277
+ "eval_steps_per_second": 13.918,
3278
+ "step": 19000
3279
  }
3280
  ],
3281
  "logging_steps": 50,