irishprancer commited on
Commit
a0c57c7
·
verified ·
1 Parent(s): 7f4743f

Training in progress, step 1200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74db2edb2bb7d5966ecd238b31e38d161d7744310a78858e222ce71d8abbe6e7
3
  size 1915531480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d9b30bda357abdba7c56881cb473af624f526c1fa92c929f215a5a1c3ec5bfe
3
  size 1915531480
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c22f510185b0831925cf2c4bf9605a61ab290fb97c9ac16e83e64c3ecf870cd
3
- size 3761523194
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0626b5e2d2857482f77fe19fa9ab9e0eb0d23c7780df49d50411dd6f20322277
3
+ size 3761523130
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e14813183765fc0f4336c434df24ebab953e59767926fe3ae139a286b91e9fe8
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44798913ce397550afc33e1198d628e5e41733da0f10c09d54f8ee2a125aafd1
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:727df53dab712245494b1b638425e795c38c402a21cd3cf2e025bb43ae55143e
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a799972fe88eb05dc54d00b44d244a2e51dfd9acb5e73d838820e223c42bb941
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.5633396506309509,
3
  "best_model_checkpoint": "./output/checkpoint-300",
4
- "epoch": 2.9577464788732395,
5
  "eval_steps": 150,
6
- "global_step": 1050,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -798,6 +798,119 @@
798
  "eval_samples_per_second": 7.732,
799
  "eval_steps_per_second": 7.732,
800
  "step": 1050
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
801
  }
802
  ],
803
  "logging_steps": 10,
@@ -817,7 +930,7 @@
817
  "attributes": {}
818
  }
819
  },
820
- "total_flos": 1.50702459664171e+17,
821
  "train_batch_size": 8,
822
  "trial_name": null,
823
  "trial_params": null
 
1
  {
2
  "best_metric": 0.5633396506309509,
3
  "best_model_checkpoint": "./output/checkpoint-300",
4
+ "epoch": 3.380281690140845,
5
  "eval_steps": 150,
6
+ "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
798
  "eval_samples_per_second": 7.732,
799
  "eval_steps_per_second": 7.732,
800
  "step": 1050
801
+ },
802
+ {
803
+ "epoch": 2.9859154929577465,
804
+ "grad_norm": 6.66109561920166,
805
+ "learning_rate": 1.505165945489438e-05,
806
+ "loss": 0.306,
807
+ "step": 1060
808
+ },
809
+ {
810
+ "epoch": 3.0140845070422535,
811
+ "grad_norm": 5.485683441162109,
812
+ "learning_rate": 1.5020847701829796e-05,
813
+ "loss": 0.2564,
814
+ "step": 1070
815
+ },
816
+ {
817
+ "epoch": 3.0422535211267605,
818
+ "grad_norm": 5.308038234710693,
819
+ "learning_rate": 1.4989759111105884e-05,
820
+ "loss": 0.1924,
821
+ "step": 1080
822
+ },
823
+ {
824
+ "epoch": 3.0704225352112675,
825
+ "grad_norm": 4.971621513366699,
826
+ "learning_rate": 1.4958394960652847e-05,
827
+ "loss": 0.2002,
828
+ "step": 1090
829
+ },
830
+ {
831
+ "epoch": 3.0985915492957745,
832
+ "grad_norm": 7.283688068389893,
833
+ "learning_rate": 1.4926756539728063e-05,
834
+ "loss": 0.1983,
835
+ "step": 1100
836
+ },
837
+ {
838
+ "epoch": 3.1267605633802815,
839
+ "grad_norm": 7.20900297164917,
840
+ "learning_rate": 1.4894845148863101e-05,
841
+ "loss": 0.2034,
842
+ "step": 1110
843
+ },
844
+ {
845
+ "epoch": 3.1549295774647885,
846
+ "grad_norm": 6.557058334350586,
847
+ "learning_rate": 1.4862662099810248e-05,
848
+ "loss": 0.2055,
849
+ "step": 1120
850
+ },
851
+ {
852
+ "epoch": 3.183098591549296,
853
+ "grad_norm": 7.073098182678223,
854
+ "learning_rate": 1.4830208715488596e-05,
855
+ "loss": 0.2159,
856
+ "step": 1130
857
+ },
858
+ {
859
+ "epoch": 3.211267605633803,
860
+ "grad_norm": 5.674464702606201,
861
+ "learning_rate": 1.4797486329929669e-05,
862
+ "loss": 0.2116,
863
+ "step": 1140
864
+ },
865
+ {
866
+ "epoch": 3.23943661971831,
867
+ "grad_norm": 5.907012939453125,
868
+ "learning_rate": 1.4764496288222567e-05,
869
+ "loss": 0.2048,
870
+ "step": 1150
871
+ },
872
+ {
873
+ "epoch": 3.267605633802817,
874
+ "grad_norm": 6.804496765136719,
875
+ "learning_rate": 1.4731239946458692e-05,
876
+ "loss": 0.2055,
877
+ "step": 1160
878
+ },
879
+ {
880
+ "epoch": 3.295774647887324,
881
+ "grad_norm": 6.147552013397217,
882
+ "learning_rate": 1.4697718671676005e-05,
883
+ "loss": 0.1937,
884
+ "step": 1170
885
+ },
886
+ {
887
+ "epoch": 3.323943661971831,
888
+ "grad_norm": 7.037655353546143,
889
+ "learning_rate": 1.4663933841802809e-05,
890
+ "loss": 0.2237,
891
+ "step": 1180
892
+ },
893
+ {
894
+ "epoch": 3.352112676056338,
895
+ "grad_norm": 6.545534133911133,
896
+ "learning_rate": 1.4629886845601138e-05,
897
+ "loss": 0.2172,
898
+ "step": 1190
899
+ },
900
+ {
901
+ "epoch": 3.380281690140845,
902
+ "grad_norm": 7.699334621429443,
903
+ "learning_rate": 1.4595579082609653e-05,
904
+ "loss": 0.2238,
905
+ "step": 1200
906
+ },
907
+ {
908
+ "epoch": 3.380281690140845,
909
+ "eval_loss": 0.6862347722053528,
910
+ "eval_runtime": 37.5485,
911
+ "eval_samples_per_second": 8.389,
912
+ "eval_steps_per_second": 8.389,
913
+ "step": 1200
914
  }
915
  ],
916
  "logging_steps": 10,
 
930
  "attributes": {}
931
  }
932
  },
933
+ "total_flos": 1.7258996702482022e+17,
934
  "train_batch_size": 8,
935
  "trial_name": null,
936
  "trial_params": null