rootxhacker commited on
Commit
893260e
·
verified ·
1 Parent(s): 6d038b4

Training in progress, step 4500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75410142e7d88776abfc3caa1d65963cd8d85da0ff4f4ef2a8275d75bab13fc7
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93119a0d9cb064139d255492b711db7396f97fb777f6b21f3047db86160f8da1
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:543307a68c41ab7eab4206e5219ee92650f34cf85fdde0ae16f2041a7d666f3f
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:125ad1b5dfbb7c0433847ba8eef28656d1f2c9a2847dd305ac88da601d14eb44
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ba374ceeab3c18ecca1a51999f0124017113854b9f36714acb5b3cbd2a2d7ce
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:271210047536fd837cd0fd091596a0f630f6e94e0fa870a34b128422c282005c
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51fee8eb233312b3e7cebdb2d20739865391ab32cb8dc65d3edc7a1f54a4611d
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c3093554b4b4d0ee00227f47b178cf5d2eae0462d643e6318319b65a22a3843
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c31d829461e38fcd04f37e8cd6d881ff82a519c1416dbfe781c31b6217218abc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eced77b7b0854293e79cdebb593e6728be6e65566d560277706b561437e8ee74
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 3500,
3
- "best_metric": 4.488556861877441,
4
- "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-3500",
5
- "epoch": 0.30766864087377893,
6
  "eval_steps": 250,
7
- "global_step": 4000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -696,6 +696,92 @@
696
  "eval_samples_per_second": 53.205,
697
  "eval_steps_per_second": 13.301,
698
  "step": 4000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
699
  }
700
  ],
701
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 4500,
3
+ "best_metric": 4.479401588439941,
4
+ "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-4500",
5
+ "epoch": 0.3461272209830013,
6
  "eval_steps": 250,
7
+ "global_step": 4500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
696
  "eval_samples_per_second": 53.205,
697
  "eval_steps_per_second": 13.301,
698
  "step": 4000
699
+ },
700
+ {
701
+ "epoch": 0.3115144988847012,
702
+ "grad_norm": 3.854024648666382,
703
+ "learning_rate": 0.00011970074629609205,
704
+ "loss": 4.4989,
705
+ "step": 4050
706
+ },
707
+ {
708
+ "epoch": 0.3153603568956234,
709
+ "grad_norm": 4.0870490074157715,
710
+ "learning_rate": 0.00011944340281607294,
711
+ "loss": 4.3779,
712
+ "step": 4100
713
+ },
714
+ {
715
+ "epoch": 0.31920621490654566,
716
+ "grad_norm": 4.4627251625061035,
717
+ "learning_rate": 0.0001191860593360538,
718
+ "loss": 4.5526,
719
+ "step": 4150
720
+ },
721
+ {
722
+ "epoch": 0.3230520729174679,
723
+ "grad_norm": 7.568991184234619,
724
+ "learning_rate": 0.00011892871585603468,
725
+ "loss": 4.6285,
726
+ "step": 4200
727
+ },
728
+ {
729
+ "epoch": 0.32689793092839015,
730
+ "grad_norm": 4.214425086975098,
731
+ "learning_rate": 0.00011867137237601558,
732
+ "loss": 4.5328,
733
+ "step": 4250
734
+ },
735
+ {
736
+ "epoch": 0.32689793092839015,
737
+ "eval_loss": 4.511099815368652,
738
+ "eval_runtime": 18.7154,
739
+ "eval_samples_per_second": 53.432,
740
+ "eval_steps_per_second": 13.358,
741
+ "step": 4250
742
+ },
743
+ {
744
+ "epoch": 0.33074378893931233,
745
+ "grad_norm": 2.3888497352600098,
746
+ "learning_rate": 0.00011841402889599646,
747
+ "loss": 4.5408,
748
+ "step": 4300
749
+ },
750
+ {
751
+ "epoch": 0.3345896469502346,
752
+ "grad_norm": 3.128143548965454,
753
+ "learning_rate": 0.00011815668541597735,
754
+ "loss": 4.3879,
755
+ "step": 4350
756
+ },
757
+ {
758
+ "epoch": 0.3384355049611568,
759
+ "grad_norm": 4.353067874908447,
760
+ "learning_rate": 0.00011789934193595823,
761
+ "loss": 4.5091,
762
+ "step": 4400
763
+ },
764
+ {
765
+ "epoch": 0.34228136297207906,
766
+ "grad_norm": 4.771759986877441,
767
+ "learning_rate": 0.00011764199845593911,
768
+ "loss": 4.407,
769
+ "step": 4450
770
+ },
771
+ {
772
+ "epoch": 0.3461272209830013,
773
+ "grad_norm": 2.9524829387664795,
774
+ "learning_rate": 0.00011738465497591999,
775
+ "loss": 4.3798,
776
+ "step": 4500
777
+ },
778
+ {
779
+ "epoch": 0.3461272209830013,
780
+ "eval_loss": 4.479401588439941,
781
+ "eval_runtime": 18.8172,
782
+ "eval_samples_per_second": 53.143,
783
+ "eval_steps_per_second": 13.286,
784
+ "step": 4500
785
  }
786
  ],
787
  "logging_steps": 50,