alicegoesdown commited on
Commit
20e6229
·
verified ·
1 Parent(s): fcbfab9

Training in progress, step 1200, checkpoint

Browse files
last-checkpoint/lora_top/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:445dfc8c9a44767a9d29209856d539d1af1c5ffbfea88ff27342cc41a2123780
3
  size 6299784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:544243a4ad2d25dac28345763c4b1a3a8c1739a2bc60868444f3034f5c58a1e6
3
  size 6299784
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:015db8111d5fb0b2719936aa426d41571826aa24cdb6e8099ed8132fa831fed6
3
  size 12623930
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0e5b197894eaf041b41b2f6fb6f957116cc9a9b767ef8d77107f2a93d846965
3
  size 12623930
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:494ad52bdfa5ee43ee076b397c9e670106a4c42645a5460ea78ea0cc6f64f64d
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dc19b6e4aa6d96d209bc3cde10ac40343788cc8e21e98e8b0ad66316abe87b9
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40989f422992086de56b7ab56f6bba81cac547e6785f52be59f14989b370dd0e
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80737f34367bed0f31ec1eeecad8be2c3717a20421ad2a8f693a5747cb780b5c
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 4.179497241973877,
3
- "best_model_checkpoint": "./output/checkpoint-1050",
4
- "epoch": 1.6203703703703702,
5
  "eval_steps": 150,
6
- "global_step": 1050,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -798,6 +798,119 @@
798
  "eval_samples_per_second": 40.024,
799
  "eval_steps_per_second": 40.024,
800
  "step": 1050
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
801
  }
802
  ],
803
  "logging_steps": 10,
@@ -817,7 +930,7 @@
817
  "attributes": {}
818
  }
819
  },
820
- "total_flos": 4032037194940416.0,
821
  "train_batch_size": 16,
822
  "trial_name": null,
823
  "trial_params": null
 
1
  {
2
+ "best_metric": 4.128114223480225,
3
+ "best_model_checkpoint": "./output/checkpoint-1200",
4
+ "epoch": 1.8518518518518519,
5
  "eval_steps": 150,
6
+ "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
798
  "eval_samples_per_second": 40.024,
799
  "eval_steps_per_second": 40.024,
800
  "step": 1050
801
+ },
802
+ {
803
+ "epoch": 1.6358024691358026,
804
+ "grad_norm": 2.5124268531799316,
805
+ "learning_rate": 9.082436730845992e-05,
806
+ "loss": 4.0174,
807
+ "step": 1060
808
+ },
809
+ {
810
+ "epoch": 1.6512345679012346,
811
+ "grad_norm": 2.7575981616973877,
812
+ "learning_rate": 9.063844375723012e-05,
813
+ "loss": 4.0652,
814
+ "step": 1070
815
+ },
816
+ {
817
+ "epoch": 1.6666666666666665,
818
+ "grad_norm": 2.8286046981811523,
819
+ "learning_rate": 9.045084971874735e-05,
820
+ "loss": 3.9928,
821
+ "step": 1080
822
+ },
823
+ {
824
+ "epoch": 1.682098765432099,
825
+ "grad_norm": 3.8293561935424805,
826
+ "learning_rate": 9.026159290426779e-05,
827
+ "loss": 4.1342,
828
+ "step": 1090
829
+ },
830
+ {
831
+ "epoch": 1.6975308641975309,
832
+ "grad_norm": 3.0728299617767334,
833
+ "learning_rate": 9.007068109339781e-05,
834
+ "loss": 4.0104,
835
+ "step": 1100
836
+ },
837
+ {
838
+ "epoch": 1.7129629629629628,
839
+ "grad_norm": 3.8225083351135254,
840
+ "learning_rate": 8.987812213377421e-05,
841
+ "loss": 4.0686,
842
+ "step": 1110
843
+ },
844
+ {
845
+ "epoch": 1.7283950617283952,
846
+ "grad_norm": 3.5748400688171387,
847
+ "learning_rate": 8.968392394074161e-05,
848
+ "loss": 4.0517,
849
+ "step": 1120
850
+ },
851
+ {
852
+ "epoch": 1.7438271604938271,
853
+ "grad_norm": 3.027132272720337,
854
+ "learning_rate": 8.94880944970271e-05,
855
+ "loss": 4.0448,
856
+ "step": 1130
857
+ },
858
+ {
859
+ "epoch": 1.7592592592592593,
860
+ "grad_norm": 3.1220285892486572,
861
+ "learning_rate": 8.92906418524121e-05,
862
+ "loss": 3.9604,
863
+ "step": 1140
864
+ },
865
+ {
866
+ "epoch": 1.7746913580246915,
867
+ "grad_norm": 2.573742151260376,
868
+ "learning_rate": 8.909157412340148e-05,
869
+ "loss": 4.0411,
870
+ "step": 1150
871
+ },
872
+ {
873
+ "epoch": 1.7901234567901234,
874
+ "grad_norm": 2.7320876121520996,
875
+ "learning_rate": 8.889089949288984e-05,
876
+ "loss": 4.0438,
877
+ "step": 1160
878
+ },
879
+ {
880
+ "epoch": 1.8055555555555556,
881
+ "grad_norm": 3.434393882751465,
882
+ "learning_rate": 8.868862620982532e-05,
883
+ "loss": 4.0672,
884
+ "step": 1170
885
+ },
886
+ {
887
+ "epoch": 1.8209876543209877,
888
+ "grad_norm": 2.5772244930267334,
889
+ "learning_rate": 8.848476258887028e-05,
890
+ "loss": 3.9226,
891
+ "step": 1180
892
+ },
893
+ {
894
+ "epoch": 1.8364197530864197,
895
+ "grad_norm": 4.394677639007568,
896
+ "learning_rate": 8.827931701005971e-05,
897
+ "loss": 3.9469,
898
+ "step": 1190
899
+ },
900
+ {
901
+ "epoch": 1.8518518518518519,
902
+ "grad_norm": 3.7775449752807617,
903
+ "learning_rate": 8.80722979184567e-05,
904
+ "loss": 4.0099,
905
+ "step": 1200
906
+ },
907
+ {
908
+ "epoch": 1.8518518518518519,
909
+ "eval_loss": 4.128114223480225,
910
+ "eval_runtime": 12.8017,
911
+ "eval_samples_per_second": 39.057,
912
+ "eval_steps_per_second": 39.057,
913
+ "step": 1200
914
  }
915
  ],
916
  "logging_steps": 10,
 
930
  "attributes": {}
931
  }
932
  },
933
+ "total_flos": 4599042483142656.0,
934
  "train_batch_size": 16,
935
  "trial_name": null,
936
  "trial_params": null