alicegoesdown commited on
Commit
da5b168
·
verified ·
1 Parent(s): 20e6229

Training in progress, step 1350, checkpoint

Browse files
last-checkpoint/lora_top/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:544243a4ad2d25dac28345763c4b1a3a8c1739a2bc60868444f3034f5c58a1e6
3
  size 6299784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc3a67f4de1685ce6a0fced5c481b644514ade913e29a302672e9588e575aaad
3
  size 6299784
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0e5b197894eaf041b41b2f6fb6f957116cc9a9b767ef8d77107f2a93d846965
3
  size 12623930
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c3361d74875a43ac74d1859dc2aa87429b4f648ae5c9304cdab88e987b01c49
3
  size 12623930
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5dc19b6e4aa6d96d209bc3cde10ac40343788cc8e21e98e8b0ad66316abe87b9
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d242322c5678470cae524c621709ef41118946651d32e327740afb650f163702
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80737f34367bed0f31ec1eeecad8be2c3717a20421ad2a8f693a5747cb780b5c
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d682cfc5aa181fdf75f0f7c385234b0db148db5e71a3fbb7d749d518ba02734f
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 4.128114223480225,
3
- "best_model_checkpoint": "./output/checkpoint-1200",
4
- "epoch": 1.8518518518518519,
5
  "eval_steps": 150,
6
- "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -911,6 +911,119 @@
911
  "eval_samples_per_second": 39.057,
912
  "eval_steps_per_second": 39.057,
913
  "step": 1200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
914
  }
915
  ],
916
  "logging_steps": 10,
@@ -930,7 +1043,7 @@
930
  "attributes": {}
931
  }
932
  },
933
- "total_flos": 4599042483142656.0,
934
  "train_batch_size": 16,
935
  "trial_name": null,
936
  "trial_params": null
 
1
  {
2
+ "best_metric": 4.089999198913574,
3
+ "best_model_checkpoint": "./output/checkpoint-1350",
4
+ "epoch": 2.0833333333333335,
5
  "eval_steps": 150,
6
+ "global_step": 1350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
911
  "eval_samples_per_second": 39.057,
912
  "eval_steps_per_second": 39.057,
913
  "step": 1200
914
+ },
915
+ {
916
+ "epoch": 1.867283950617284,
917
+ "grad_norm": 3.017685651779175,
918
+ "learning_rate": 8.786371382380525e-05,
919
+ "loss": 4.0029,
920
+ "step": 1210
921
+ },
922
+ {
923
+ "epoch": 1.882716049382716,
924
+ "grad_norm": 3.601710081100464,
925
+ "learning_rate": 8.765357330018053e-05,
926
+ "loss": 3.9062,
927
+ "step": 1220
928
+ },
929
+ {
930
+ "epoch": 1.8981481481481481,
931
+ "grad_norm": 3.4925272464752197,
932
+ "learning_rate": 8.744188498563639e-05,
933
+ "loss": 3.8948,
934
+ "step": 1230
935
+ },
936
+ {
937
+ "epoch": 1.9135802469135803,
938
+ "grad_norm": 2.6413700580596924,
939
+ "learning_rate": 8.722865758185034e-05,
940
+ "loss": 4.0399,
941
+ "step": 1240
942
+ },
943
+ {
944
+ "epoch": 1.9290123456790123,
945
+ "grad_norm": 3.077667236328125,
946
+ "learning_rate": 8.701389985376575e-05,
947
+ "loss": 4.0094,
948
+ "step": 1250
949
+ },
950
+ {
951
+ "epoch": 1.9444444444444444,
952
+ "grad_norm": 3.1330454349517822,
953
+ "learning_rate": 8.679762062923174e-05,
954
+ "loss": 4.0173,
955
+ "step": 1260
956
+ },
957
+ {
958
+ "epoch": 1.9598765432098766,
959
+ "grad_norm": 2.7747910022735596,
960
+ "learning_rate": 8.657982879864005e-05,
961
+ "loss": 4.0111,
962
+ "step": 1270
963
+ },
964
+ {
965
+ "epoch": 1.9753086419753085,
966
+ "grad_norm": 2.432088851928711,
967
+ "learning_rate": 8.636053331455984e-05,
968
+ "loss": 4.0052,
969
+ "step": 1280
970
+ },
971
+ {
972
+ "epoch": 1.9907407407407407,
973
+ "grad_norm": 3.8459959030151367,
974
+ "learning_rate": 8.613974319136955e-05,
975
+ "loss": 3.9691,
976
+ "step": 1290
977
+ },
978
+ {
979
+ "epoch": 2.006172839506173,
980
+ "grad_norm": 3.38808536529541,
981
+ "learning_rate": 8.591746750488636e-05,
982
+ "loss": 3.8768,
983
+ "step": 1300
984
+ },
985
+ {
986
+ "epoch": 2.021604938271605,
987
+ "grad_norm": 4.70631742477417,
988
+ "learning_rate": 8.569371539199313e-05,
989
+ "loss": 3.8564,
990
+ "step": 1310
991
+ },
992
+ {
993
+ "epoch": 2.037037037037037,
994
+ "grad_norm": 2.96028208732605,
995
+ "learning_rate": 8.546849605026287e-05,
996
+ "loss": 3.913,
997
+ "step": 1320
998
+ },
999
+ {
1000
+ "epoch": 2.052469135802469,
1001
+ "grad_norm": 3.452777624130249,
1002
+ "learning_rate": 8.524181873758057e-05,
1003
+ "loss": 3.7249,
1004
+ "step": 1330
1005
+ },
1006
+ {
1007
+ "epoch": 2.067901234567901,
1008
+ "grad_norm": 4.151051044464111,
1009
+ "learning_rate": 8.501369277176273e-05,
1010
+ "loss": 3.8788,
1011
+ "step": 1340
1012
+ },
1013
+ {
1014
+ "epoch": 2.0833333333333335,
1015
+ "grad_norm": 3.2513532638549805,
1016
+ "learning_rate": 8.478412753017431e-05,
1017
+ "loss": 3.875,
1018
+ "step": 1350
1019
+ },
1020
+ {
1021
+ "epoch": 2.0833333333333335,
1022
+ "eval_loss": 4.089999198913574,
1023
+ "eval_runtime": 13.4234,
1024
+ "eval_samples_per_second": 37.248,
1025
+ "eval_steps_per_second": 37.248,
1026
+ "step": 1350
1027
  }
1028
  ],
1029
  "logging_steps": 10,
 
1043
  "attributes": {}
1044
  }
1045
  },
1046
+ "total_flos": 5153548116885504.0,
1047
  "train_batch_size": 16,
1048
  "trial_name": null,
1049
  "trial_params": null