rootxhacker commited on
Commit
8d02919
·
verified ·
1 Parent(s): a89b35e

Training in progress, step 21000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3fe29335718233e84f1e65e1e50047d21ed0639ab8d8950d51643ac36288179
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8231b5839b3fd47ad5cce000da3f4e051d7af6920ced1cf4cb54e84597d2b4b4
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c45065f737f44883104a4351c674ace9478a208428702e003f4cf5e07e96452
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed47ddac3643ee93d1379696697c4c75dda7c0901e6d1bb4df3a71e2beb6b586
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5dbffb5b9d940a96eb51dbe7a35860718ecfeca99437ad8b1b71dbafaefdd573
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c5132bfe30a63a6382081eeb67e311e07e2d98916afbc38ab8f8ba0c010b059
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:007715702cfbd38b6fa4b8b523a7e58a1cfb0651b5c1c69d07c436463e26a8bf
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bde6e1dd1720140d45bcd90f5619a65ccf408846b486bcff0e4f949114af183
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:780bb1353dd0d4e52605608cc24ae3410777df1acea35ed02a82bdb21fa7ba07
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f258c2638ad619fb6ebcf7d3e37d8ff87a3bfd1b69337c35619ebe6b4477e7f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 20250,
3
- "best_metric": 1.4672300815582275,
4
- "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-15500",
5
- "epoch": 1.576801784478117,
6
  "eval_steps": 250,
7
- "global_step": 20500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3534,6 +3534,92 @@
3534
  "eval_samples_per_second": 55.433,
3535
  "eval_steps_per_second": 13.858,
3536
  "step": 20500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3537
  }
3538
  ],
3539
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 21000,
3
+ "best_metric": 1.467063307762146,
4
+ "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-21000",
5
+ "epoch": 1.6152603645873396,
6
  "eval_steps": 250,
7
+ "global_step": 21000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3534
  "eval_samples_per_second": 55.433,
3535
  "eval_steps_per_second": 13.858,
3536
  "step": 20500
3537
+ },
3538
+ {
3539
+ "epoch": 1.5806476424890392,
3540
+ "grad_norm": 1.36208176612854,
3541
+ "learning_rate": 3.480827910738575e-05,
3542
+ "loss": 1.3615,
3543
+ "step": 20550
3544
+ },
3545
+ {
3546
+ "epoch": 1.5844935004999616,
3547
+ "grad_norm": 1.6889315843582153,
3548
+ "learning_rate": 3.455093562736664e-05,
3549
+ "loss": 1.4174,
3550
+ "step": 20600
3551
+ },
3552
+ {
3553
+ "epoch": 1.5883393585108838,
3554
+ "grad_norm": 1.2735401391983032,
3555
+ "learning_rate": 3.429359214734752e-05,
3556
+ "loss": 1.4482,
3557
+ "step": 20650
3558
+ },
3559
+ {
3560
+ "epoch": 1.592185216521806,
3561
+ "grad_norm": 1.668188452720642,
3562
+ "learning_rate": 3.4036248667328405e-05,
3563
+ "loss": 1.4193,
3564
+ "step": 20700
3565
+ },
3566
+ {
3567
+ "epoch": 1.5960310745327284,
3568
+ "grad_norm": 1.8626503944396973,
3569
+ "learning_rate": 3.3778905187309284e-05,
3570
+ "loss": 1.4477,
3571
+ "step": 20750
3572
+ },
3573
+ {
3574
+ "epoch": 1.5960310745327284,
3575
+ "eval_loss": 1.4779850244522095,
3576
+ "eval_runtime": 18.0373,
3577
+ "eval_samples_per_second": 55.441,
3578
+ "eval_steps_per_second": 13.86,
3579
+ "step": 20750
3580
+ },
3581
+ {
3582
+ "epoch": 1.5998769325436504,
3583
+ "grad_norm": 1.2189550399780273,
3584
+ "learning_rate": 3.352156170729017e-05,
3585
+ "loss": 1.5325,
3586
+ "step": 20800
3587
+ },
3588
+ {
3589
+ "epoch": 1.6037227905545728,
3590
+ "grad_norm": 2.126854658126831,
3591
+ "learning_rate": 3.326421822727105e-05,
3592
+ "loss": 1.5096,
3593
+ "step": 20850
3594
+ },
3595
+ {
3596
+ "epoch": 1.607568648565495,
3597
+ "grad_norm": 1.7529182434082031,
3598
+ "learning_rate": 3.300687474725194e-05,
3599
+ "loss": 1.4629,
3600
+ "step": 20900
3601
+ },
3602
+ {
3603
+ "epoch": 1.6114145065764172,
3604
+ "grad_norm": 2.2533035278320312,
3605
+ "learning_rate": 3.2749531267232824e-05,
3606
+ "loss": 1.4266,
3607
+ "step": 20950
3608
+ },
3609
+ {
3610
+ "epoch": 1.6152603645873396,
3611
+ "grad_norm": 1.6632803678512573,
3612
+ "learning_rate": 3.24921877872137e-05,
3613
+ "loss": 1.5018,
3614
+ "step": 21000
3615
+ },
3616
+ {
3617
+ "epoch": 1.6152603645873396,
3618
+ "eval_loss": 1.467063307762146,
3619
+ "eval_runtime": 18.0767,
3620
+ "eval_samples_per_second": 55.32,
3621
+ "eval_steps_per_second": 13.83,
3622
+ "step": 21000
3623
  }
3624
  ],
3625
  "logging_steps": 50,