rootxhacker commited on
Commit
01865eb
·
verified ·
1 Parent(s): 74c45de

Training in progress, step 5000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93119a0d9cb064139d255492b711db7396f97fb777f6b21f3047db86160f8da1
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0c1e03b37e306af29004a56bbc2e3cfe78ac4558d064ded4954032563a506c0
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:125ad1b5dfbb7c0433847ba8eef28656d1f2c9a2847dd305ac88da601d14eb44
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d76b1ed26fb441fb296b2bc10e92b9546db047308ea5940e5ff2ef749dcfb42
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:271210047536fd837cd0fd091596a0f630f6e94e0fa870a34b128422c282005c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a263bfee7c334e6fc36b6c4750e79a70eeb042f841889c6eb0ea501e4fa6ac73
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c3093554b4b4d0ee00227f47b178cf5d2eae0462d643e6318319b65a22a3843
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bbcc820499d7de99bfade4a29f6e9516d1e9cb1c8de79befcf1f0d7f2a4da15
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eced77b7b0854293e79cdebb593e6728be6e65566d560277706b561437e8ee74
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7200a3e7cdec9309704535acc69d439ad04e127a63c3c13c5ca0a94f0236ea31
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 4500,
3
- "best_metric": 4.479401588439941,
4
- "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-4500",
5
- "epoch": 0.3461272209830013,
6
  "eval_steps": 250,
7
- "global_step": 4500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -782,6 +782,92 @@
782
  "eval_samples_per_second": 53.143,
783
  "eval_steps_per_second": 13.286,
784
  "step": 4500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
785
  }
786
  ],
787
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 5000,
3
+ "best_metric": 4.414160251617432,
4
+ "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-5000",
5
+ "epoch": 0.3845858010922237,
6
  "eval_steps": 250,
7
+ "global_step": 5000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
782
  "eval_samples_per_second": 53.143,
783
  "eval_steps_per_second": 13.286,
784
  "step": 4500
785
+ },
786
+ {
787
+ "epoch": 0.34997307899392355,
788
+ "grad_norm": 4.825377941131592,
789
+ "learning_rate": 0.00011712731149590087,
790
+ "loss": 4.5321,
791
+ "step": 4550
792
+ },
793
+ {
794
+ "epoch": 0.3538189370048458,
795
+ "grad_norm": 3.5786240100860596,
796
+ "learning_rate": 0.00011686996801588176,
797
+ "loss": 4.5819,
798
+ "step": 4600
799
+ },
800
+ {
801
+ "epoch": 0.35766479501576803,
802
+ "grad_norm": 4.445742130279541,
803
+ "learning_rate": 0.00011661262453586264,
804
+ "loss": 4.5954,
805
+ "step": 4650
806
+ },
807
+ {
808
+ "epoch": 0.3615106530266903,
809
+ "grad_norm": 4.670301914215088,
810
+ "learning_rate": 0.00011635528105584354,
811
+ "loss": 4.3381,
812
+ "step": 4700
813
+ },
814
+ {
815
+ "epoch": 0.36535651103761246,
816
+ "grad_norm": 3.0563037395477295,
817
+ "learning_rate": 0.0001160979375758244,
818
+ "loss": 4.4451,
819
+ "step": 4750
820
+ },
821
+ {
822
+ "epoch": 0.36535651103761246,
823
+ "eval_loss": 4.503940582275391,
824
+ "eval_runtime": 19.0274,
825
+ "eval_samples_per_second": 52.556,
826
+ "eval_steps_per_second": 13.139,
827
+ "step": 4750
828
+ },
829
+ {
830
+ "epoch": 0.3692023690485347,
831
+ "grad_norm": 4.921920299530029,
832
+ "learning_rate": 0.00011584059409580528,
833
+ "loss": 4.5505,
834
+ "step": 4800
835
+ },
836
+ {
837
+ "epoch": 0.37304822705945695,
838
+ "grad_norm": 4.440188407897949,
839
+ "learning_rate": 0.00011558325061578617,
840
+ "loss": 4.5339,
841
+ "step": 4850
842
+ },
843
+ {
844
+ "epoch": 0.3768940850703792,
845
+ "grad_norm": 4.123379707336426,
846
+ "learning_rate": 0.00011532590713576705,
847
+ "loss": 4.5001,
848
+ "step": 4900
849
+ },
850
+ {
851
+ "epoch": 0.38073994308130144,
852
+ "grad_norm": 3.6461265087127686,
853
+ "learning_rate": 0.00011506856365574795,
854
+ "loss": 4.4704,
855
+ "step": 4950
856
+ },
857
+ {
858
+ "epoch": 0.3845858010922237,
859
+ "grad_norm": 4.586422443389893,
860
+ "learning_rate": 0.00011481122017572883,
861
+ "loss": 4.5607,
862
+ "step": 5000
863
+ },
864
+ {
865
+ "epoch": 0.3845858010922237,
866
+ "eval_loss": 4.414160251617432,
867
+ "eval_runtime": 18.6554,
868
+ "eval_samples_per_second": 53.604,
869
+ "eval_steps_per_second": 13.401,
870
+ "step": 5000
871
  }
872
  ],
873
  "logging_steps": 50,