rootxhacker commited on
Commit
8f01352
·
verified ·
1 Parent(s): bd848bb

Training in progress, step 21500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8231b5839b3fd47ad5cce000da3f4e051d7af6920ced1cf4cb54e84597d2b4b4
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a15735cee6a081c034ee0a7775a5bd0e6cd549717f7509e7cac3a604e2b5ece6
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed47ddac3643ee93d1379696697c4c75dda7c0901e6d1bb4df3a71e2beb6b586
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1989d46f84770def08bc86fa4f365fd487bca1f5ce4a5f6b3c39adc01e8043b
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c5132bfe30a63a6382081eeb67e311e07e2d98916afbc38ab8f8ba0c010b059
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24cdd6f45f9ebce887e06d548bf949bd18ccf52b17999d18752823b6dd7dc03a
3
+ size 14308
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2bde6e1dd1720140d45bcd90f5619a65ccf408846b486bcff0e4f949114af183
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bb9f4faaa4ec0530a4b6c0b900d92b0246f7d7e281fc5a146d4979b5987fc8e
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f258c2638ad619fb6ebcf7d3e37d8ff87a3bfd1b69337c35619ebe6b4477e7f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c15c154285d68d5fe6fd0358f59f07ef7cc4e4e79592695b393ca4cfdda9f3f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 21000,
3
  "best_metric": 1.467063307762146,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-21000",
5
- "epoch": 1.6152603645873396,
6
  "eval_steps": 250,
7
- "global_step": 21000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3620,6 +3620,92 @@
3620
  "eval_samples_per_second": 55.32,
3621
  "eval_steps_per_second": 13.83,
3622
  "step": 21000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3623
  }
3624
  ],
3625
  "logging_steps": 50,
 
2
  "best_global_step": 21000,
3
  "best_metric": 1.467063307762146,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-21000",
5
+ "epoch": 1.6537189446965619,
6
  "eval_steps": 250,
7
+ "global_step": 21500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3620
  "eval_samples_per_second": 55.32,
3621
  "eval_steps_per_second": 13.83,
3622
  "step": 21000
3623
+ },
3624
+ {
3625
+ "epoch": 1.6191062225982615,
3626
+ "grad_norm": 2.016814708709717,
3627
+ "learning_rate": 3.223484430719458e-05,
3628
+ "loss": 1.434,
3629
+ "step": 21050
3630
+ },
3631
+ {
3632
+ "epoch": 1.622952080609184,
3633
+ "grad_norm": 1.5766371488571167,
3634
+ "learning_rate": 3.197750082717547e-05,
3635
+ "loss": 1.4249,
3636
+ "step": 21100
3637
+ },
3638
+ {
3639
+ "epoch": 1.6267979386201061,
3640
+ "grad_norm": 2.3865230083465576,
3641
+ "learning_rate": 3.172015734715635e-05,
3642
+ "loss": 1.6,
3643
+ "step": 21150
3644
+ },
3645
+ {
3646
+ "epoch": 1.6306437966310283,
3647
+ "grad_norm": 1.193731427192688,
3648
+ "learning_rate": 3.1462813867137236e-05,
3649
+ "loss": 1.5674,
3650
+ "step": 21200
3651
+ },
3652
+ {
3653
+ "epoch": 1.6344896546419507,
3654
+ "grad_norm": 1.4854563474655151,
3655
+ "learning_rate": 3.120547038711812e-05,
3656
+ "loss": 1.4788,
3657
+ "step": 21250
3658
+ },
3659
+ {
3660
+ "epoch": 1.6344896546419507,
3661
+ "eval_loss": 1.4725981950759888,
3662
+ "eval_runtime": 18.2185,
3663
+ "eval_samples_per_second": 54.889,
3664
+ "eval_steps_per_second": 13.722,
3665
+ "step": 21250
3666
+ },
3667
+ {
3668
+ "epoch": 1.6383355126528727,
3669
+ "grad_norm": 1.3907707929611206,
3670
+ "learning_rate": 3.0948126907099e-05,
3671
+ "loss": 1.4752,
3672
+ "step": 21300
3673
+ },
3674
+ {
3675
+ "epoch": 1.642181370663795,
3676
+ "grad_norm": 1.5267348289489746,
3677
+ "learning_rate": 3.069078342707988e-05,
3678
+ "loss": 1.4198,
3679
+ "step": 21350
3680
+ },
3681
+ {
3682
+ "epoch": 1.6460272286747173,
3683
+ "grad_norm": 1.2138367891311646,
3684
+ "learning_rate": 3.0433439947060768e-05,
3685
+ "loss": 1.4302,
3686
+ "step": 21400
3687
+ },
3688
+ {
3689
+ "epoch": 1.6498730866856395,
3690
+ "grad_norm": 1.3399436473846436,
3691
+ "learning_rate": 3.017609646704165e-05,
3692
+ "loss": 1.5098,
3693
+ "step": 21450
3694
+ },
3695
+ {
3696
+ "epoch": 1.6537189446965619,
3697
+ "grad_norm": 1.543906569480896,
3698
+ "learning_rate": 2.991875298702253e-05,
3699
+ "loss": 1.4577,
3700
+ "step": 21500
3701
+ },
3702
+ {
3703
+ "epoch": 1.6537189446965619,
3704
+ "eval_loss": 1.475114345550537,
3705
+ "eval_runtime": 18.0585,
3706
+ "eval_samples_per_second": 55.376,
3707
+ "eval_steps_per_second": 13.844,
3708
+ "step": 21500
3709
  }
3710
  ],
3711
  "logging_steps": 50,