rootxhacker commited on
Commit
93e8c08
·
verified ·
1 Parent(s): c18b385

Training in progress, step 5000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32736256c22dee581cce726cace4ad49e7d2340e9e0d45e70e795a7c7d75017f
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:190f0428e79c75fea98ae401d8541afc938c837a01a15c4bff27851efd1dd5ff
3
  size 36730224
last-checkpoint/ar_diffusion_info.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41608b990c978ab6d8dc1c26793cb750782238b85c3400b76fa4199137903300
3
  size 1544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bc6fe79754f0206b6654ffb1ff34bd91c57fda8a689c690f85d10e6dc833bb2
3
  size 1544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0abbf9e53e670e94b4d6c9f0b871f16259814e74491593da1bf6db0fb9680155
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c76afc96f497ed26d04fa930deb390dd280c8858f2593b0b831d0095f3a4f7e6
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:271210047536fd837cd0fd091596a0f630f6e94e0fa870a34b128422c282005c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a263bfee7c334e6fc36b6c4750e79a70eeb042f841889c6eb0ea501e4fa6ac73
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51de76fc3d194041ce1d9d52ff2b3fc00cdee2e0aeb7b6d8e431343a71a85eb4
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f5e01d0dbe696c7f72fcba4635ddf54e7178bc606b00e2f2f7ba88d1189fa69
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da9784da0d0932d47262fa459292b039ea021966742bee090e5b53abad4765f5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c41e45efc3657786e1c23c51c4aebb48328108f6732539bee7a77cf7b55107d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 3500,
3
- "best_metric": 4.520543098449707,
4
- "best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-3500",
5
- "epoch": 0.3461272209830013,
6
  "eval_steps": 250,
7
- "global_step": 4500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -782,6 +782,92 @@
782
  "eval_samples_per_second": 53.931,
783
  "eval_steps_per_second": 13.483,
784
  "step": 4500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
785
  }
786
  ],
787
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 5000,
3
+ "best_metric": 4.465761661529541,
4
+ "best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-5000",
5
+ "epoch": 0.3845858010922237,
6
  "eval_steps": 250,
7
+ "global_step": 5000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
782
  "eval_samples_per_second": 53.931,
783
  "eval_steps_per_second": 13.483,
784
  "step": 4500
785
+ },
786
+ {
787
+ "epoch": 0.34997307899392355,
788
+ "grad_norm": 6.404330730438232,
789
+ "learning_rate": 0.0001790042334363556,
790
+ "loss": 4.5673,
791
+ "step": 4550
792
+ },
793
+ {
794
+ "epoch": 0.3538189370048458,
795
+ "grad_norm": 10.212136268615723,
796
+ "learning_rate": 0.00017874451341453912,
797
+ "loss": 4.6249,
798
+ "step": 4600
799
+ },
800
+ {
801
+ "epoch": 0.35766479501576803,
802
+ "grad_norm": 4.401816368103027,
803
+ "learning_rate": 0.00017848479339272266,
804
+ "loss": 4.6305,
805
+ "step": 4650
806
+ },
807
+ {
808
+ "epoch": 0.3615106530266903,
809
+ "grad_norm": 4.710996150970459,
810
+ "learning_rate": 0.00017822507337090617,
811
+ "loss": 4.3731,
812
+ "step": 4700
813
+ },
814
+ {
815
+ "epoch": 0.36535651103761246,
816
+ "grad_norm": 3.150613307952881,
817
+ "learning_rate": 0.0001779653533490897,
818
+ "loss": 4.4491,
819
+ "step": 4750
820
+ },
821
+ {
822
+ "epoch": 0.36535651103761246,
823
+ "eval_loss": 4.564510345458984,
824
+ "eval_runtime": 18.5575,
825
+ "eval_samples_per_second": 53.886,
826
+ "eval_steps_per_second": 13.472,
827
+ "step": 4750
828
+ },
829
+ {
830
+ "epoch": 0.3692023690485347,
831
+ "grad_norm": 4.828207492828369,
832
+ "learning_rate": 0.0001777056333272732,
833
+ "loss": 4.5923,
834
+ "step": 4800
835
+ },
836
+ {
837
+ "epoch": 0.37304822705945695,
838
+ "grad_norm": 3.780848264694214,
839
+ "learning_rate": 0.0001774459133054567,
840
+ "loss": 4.5544,
841
+ "step": 4850
842
+ },
843
+ {
844
+ "epoch": 0.3768940850703792,
845
+ "grad_norm": 4.04913854598999,
846
+ "learning_rate": 0.00017718619328364023,
847
+ "loss": 4.5271,
848
+ "step": 4900
849
+ },
850
+ {
851
+ "epoch": 0.38073994308130144,
852
+ "grad_norm": 4.097137451171875,
853
+ "learning_rate": 0.00017692647326182377,
854
+ "loss": 4.4929,
855
+ "step": 4950
856
+ },
857
+ {
858
+ "epoch": 0.3845858010922237,
859
+ "grad_norm": 4.65788459777832,
860
+ "learning_rate": 0.00017666675324000728,
861
+ "loss": 4.5888,
862
+ "step": 5000
863
+ },
864
+ {
865
+ "epoch": 0.3845858010922237,
866
+ "eval_loss": 4.465761661529541,
867
+ "eval_runtime": 18.6518,
868
+ "eval_samples_per_second": 53.614,
869
+ "eval_steps_per_second": 13.404,
870
+ "step": 5000
871
  }
872
  ],
873
  "logging_steps": 50,