rootxhacker commited on
Commit
ef79c83
·
verified ·
1 Parent(s): 8ba1503

Training in progress, step 22000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a15735cee6a081c034ee0a7775a5bd0e6cd549717f7509e7cac3a604e2b5ece6
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7eaab29466400eb2a657f3436b2818198ebc3b9fd87cb963c622b5d81bbf4d9
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1989d46f84770def08bc86fa4f365fd487bca1f5ce4a5f6b3c39adc01e8043b
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55f3c6089dbca62c8b2bebd6faf4edc5b1bb8a8a960c2f857f9b4984ed1a89d0
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24cdd6f45f9ebce887e06d548bf949bd18ccf52b17999d18752823b6dd7dc03a
3
- size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:625da12a62ce1ec712e7ea0b4fc06eed36ff20b9634198d1bc6029989e807f8d
3
+ size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bb9f4faaa4ec0530a4b6c0b900d92b0246f7d7e281fc5a146d4979b5987fc8e
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46c97cfd73b672df8a5afb8d418c975440124b784cf73259cde1c1d1e99e483d
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c15c154285d68d5fe6fd0358f59f07ef7cc4e4e79592695b393ca4cfdda9f3f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:137a43b60deca6dd3da445bd48fbb2a7c159344a7a642ca673dc22a6f6abb915
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 21000,
3
- "best_metric": 1.467063307762146,
4
- "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-21000",
5
- "epoch": 1.6537189446965619,
6
  "eval_steps": 250,
7
- "global_step": 21500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3706,6 +3706,92 @@
3706
  "eval_samples_per_second": 55.376,
3707
  "eval_steps_per_second": 13.844,
3708
  "step": 21500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3709
  }
3710
  ],
3711
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 22000,
3
+ "best_metric": 1.4664525985717773,
4
+ "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-22000",
5
+ "epoch": 1.6921775248057842,
6
  "eval_steps": 250,
7
+ "global_step": 22000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3706
  "eval_samples_per_second": 55.376,
3707
  "eval_steps_per_second": 13.844,
3708
  "step": 21500
3709
+ },
3710
+ {
3711
+ "epoch": 1.657564802707484,
3712
+ "grad_norm": 1.2780442237854004,
3713
+ "learning_rate": 2.9661409507003417e-05,
3714
+ "loss": 1.5179,
3715
+ "step": 21550
3716
+ },
3717
+ {
3718
+ "epoch": 1.6614106607184063,
3719
+ "grad_norm": 1.206725835800171,
3720
+ "learning_rate": 2.94040660269843e-05,
3721
+ "loss": 1.4438,
3722
+ "step": 21600
3723
+ },
3724
+ {
3725
+ "epoch": 1.6652565187293287,
3726
+ "grad_norm": 2.1834638118743896,
3727
+ "learning_rate": 2.914672254696518e-05,
3728
+ "loss": 1.4783,
3729
+ "step": 21650
3730
+ },
3731
+ {
3732
+ "epoch": 1.6691023767402506,
3733
+ "grad_norm": 1.5568137168884277,
3734
+ "learning_rate": 2.8889379066946066e-05,
3735
+ "loss": 1.38,
3736
+ "step": 21700
3737
+ },
3738
+ {
3739
+ "epoch": 1.672948234751173,
3740
+ "grad_norm": 1.6938014030456543,
3741
+ "learning_rate": 2.863203558692695e-05,
3742
+ "loss": 1.3754,
3743
+ "step": 21750
3744
+ },
3745
+ {
3746
+ "epoch": 1.672948234751173,
3747
+ "eval_loss": 1.466833472251892,
3748
+ "eval_runtime": 18.1069,
3749
+ "eval_samples_per_second": 55.228,
3750
+ "eval_steps_per_second": 13.807,
3751
+ "step": 21750
3752
+ },
3753
+ {
3754
+ "epoch": 1.6767940927620952,
3755
+ "grad_norm": 1.3192166090011597,
3756
+ "learning_rate": 2.837469210690783e-05,
3757
+ "loss": 1.4388,
3758
+ "step": 21800
3759
+ },
3760
+ {
3761
+ "epoch": 1.6806399507730174,
3762
+ "grad_norm": 2.0135934352874756,
3763
+ "learning_rate": 2.8117348626888716e-05,
3764
+ "loss": 1.429,
3765
+ "step": 21850
3766
+ },
3767
+ {
3768
+ "epoch": 1.6844858087839398,
3769
+ "grad_norm": 1.4457674026489258,
3770
+ "learning_rate": 2.78600051468696e-05,
3771
+ "loss": 1.5154,
3772
+ "step": 21900
3773
+ },
3774
+ {
3775
+ "epoch": 1.6883316667948618,
3776
+ "grad_norm": 1.225411295890808,
3777
+ "learning_rate": 2.760266166685048e-05,
3778
+ "loss": 1.4658,
3779
+ "step": 21950
3780
+ },
3781
+ {
3782
+ "epoch": 1.6921775248057842,
3783
+ "grad_norm": 1.8256678581237793,
3784
+ "learning_rate": 2.7345318186831365e-05,
3785
+ "loss": 1.5004,
3786
+ "step": 22000
3787
+ },
3788
+ {
3789
+ "epoch": 1.6921775248057842,
3790
+ "eval_loss": 1.4664525985717773,
3791
+ "eval_runtime": 18.0331,
3792
+ "eval_samples_per_second": 55.454,
3793
+ "eval_steps_per_second": 13.863,
3794
+ "step": 22000
3795
  }
3796
  ],
3797
  "logging_steps": 50,