rootxhacker commited on
Commit
9742a11
·
verified ·
1 Parent(s): a2f44d2

Training in progress, step 19500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:311747411e2afa5d7eccbe8cecbb8cdb7e90a57933bbd475e13a86444c738348
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:879ead83bd2c1fe46b7e68211ea0caa2723b2414cf5d08c8c7b5b29823750c25
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2f809198f83026b95c49f46554873d5da0e0e71fe0da56c9b70dbf1713b8974
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a6fddc34b7b7d3235b23dac04ebf76b113434f948440bce4f7bae9c4f95c22a
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c57dd490464340c9b3aa212b9d3844b50cfd1f6b44323e804e87459d3ff333e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa7c19bbb017976e63ac5e9fec3eea0eda0cf5868e0275e6b00fde078e2c850d
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:792eb1c103c6d8002801463d13e95f663702a503555882676ea8b65ffc1a8b9a
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abdb0a20afab4dc0c80b1c5f76a237a0247272f8d00055102fe692e4d58c1f05
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:948224c00888abda491e877156775666f24f3b7dcd8d2d0a471fc44df7d812bd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:488aaa88c1afe9c9c3279271cc45e297a0d20ece7902b965486e19f7d9811602
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 18750,
3
  "best_metric": 1.4680087566375732,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-15500",
5
- "epoch": 1.4614260441504499,
6
  "eval_steps": 250,
7
- "global_step": 19000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3276,6 +3276,92 @@
3276
  "eval_samples_per_second": 55.67,
3277
  "eval_steps_per_second": 13.918,
3278
  "step": 19000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3279
  }
3280
  ],
3281
  "logging_steps": 50,
 
2
  "best_global_step": 18750,
3
  "best_metric": 1.4680087566375732,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-15500",
5
+ "epoch": 1.4998846242596724,
6
  "eval_steps": 250,
7
+ "global_step": 19500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3276
  "eval_samples_per_second": 55.67,
3277
  "eval_steps_per_second": 13.918,
3278
  "step": 19000
3279
+ },
3280
+ {
3281
+ "epoch": 1.4652719021613723,
3282
+ "grad_norm": 1.3094508647918701,
3283
+ "learning_rate": 4.252343663835888e-05,
3284
+ "loss": 1.4726,
3285
+ "step": 19050
3286
+ },
3287
+ {
3288
+ "epoch": 1.4691177601722945,
3289
+ "grad_norm": 2.1354212760925293,
3290
+ "learning_rate": 4.2266093158339764e-05,
3291
+ "loss": 1.4343,
3292
+ "step": 19100
3293
+ },
3294
+ {
3295
+ "epoch": 1.4729636181832166,
3296
+ "grad_norm": 1.395593523979187,
3297
+ "learning_rate": 4.200874967832065e-05,
3298
+ "loss": 1.4834,
3299
+ "step": 19150
3300
+ },
3301
+ {
3302
+ "epoch": 1.476809476194139,
3303
+ "grad_norm": 0.8917800784111023,
3304
+ "learning_rate": 4.1751406198301524e-05,
3305
+ "loss": 1.4625,
3306
+ "step": 19200
3307
+ },
3308
+ {
3309
+ "epoch": 1.480655334205061,
3310
+ "grad_norm": 2.179772138595581,
3311
+ "learning_rate": 4.149406271828241e-05,
3312
+ "loss": 1.4832,
3313
+ "step": 19250
3314
+ },
3315
+ {
3316
+ "epoch": 1.480655334205061,
3317
+ "eval_loss": 1.480191946029663,
3318
+ "eval_runtime": 17.952,
3319
+ "eval_samples_per_second": 55.704,
3320
+ "eval_steps_per_second": 13.926,
3321
+ "step": 19250
3322
+ },
3323
+ {
3324
+ "epoch": 1.4845011922159834,
3325
+ "grad_norm": 1.3308861255645752,
3326
+ "learning_rate": 4.12367192382633e-05,
3327
+ "loss": 1.4555,
3328
+ "step": 19300
3329
+ },
3330
+ {
3331
+ "epoch": 1.4883470502269056,
3332
+ "grad_norm": 1.6867352724075317,
3333
+ "learning_rate": 4.0979375758244176e-05,
3334
+ "loss": 1.4116,
3335
+ "step": 19350
3336
+ },
3337
+ {
3338
+ "epoch": 1.4921929082378278,
3339
+ "grad_norm": 2.161247491836548,
3340
+ "learning_rate": 4.072203227822506e-05,
3341
+ "loss": 1.4262,
3342
+ "step": 19400
3343
+ },
3344
+ {
3345
+ "epoch": 1.4960387662487502,
3346
+ "grad_norm": 1.717690110206604,
3347
+ "learning_rate": 4.046468879820595e-05,
3348
+ "loss": 1.3896,
3349
+ "step": 19450
3350
+ },
3351
+ {
3352
+ "epoch": 1.4998846242596724,
3353
+ "grad_norm": 1.0118234157562256,
3354
+ "learning_rate": 4.020734531818682e-05,
3355
+ "loss": 1.4503,
3356
+ "step": 19500
3357
+ },
3358
+ {
3359
+ "epoch": 1.4998846242596724,
3360
+ "eval_loss": 1.478628396987915,
3361
+ "eval_runtime": 18.0209,
3362
+ "eval_samples_per_second": 55.491,
3363
+ "eval_steps_per_second": 13.873,
3364
+ "step": 19500
3365
  }
3366
  ],
3367
  "logging_steps": 50,