abaddon182 commited on
Commit
ce681a1
·
verified ·
1 Parent(s): 0aaea48

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28ee51930983bd5783773a2667d05243c7bf04209d2af78ec03cb00c4518ab18
3
  size 17425352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8375b215361fcd084be6cb51f159ff41f69e660f7482fadac03c670e43bd6a62
3
  size 17425352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:339a2885a27a78c8922945c6793e30d8c73a3d30920de4771d5d81652f078020
3
  size 10252116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c014e6efa0fddde31c3d035dc3ae148865deaf617d38ec91a6f7c15099b32fe3
3
  size 10252116
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e4c69cf57d25409b07a793754179ed99c623c866f3c475e6cf4023b90af9758
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b87e05ea1a538a9d6373d03def254fde5a1793a5117b3a1e8f187407516a5b5
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df814464c49d6114ced0cb2649f86ff3e3c60dbf90905c8abb1f6369d1225fb1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7592e941fa77449d9f7015724b6a3901e596c11e155596880b96def1f284283
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.7133162021636963,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-1200",
4
- "epoch": 0.4083368779242875,
5
  "eval_steps": 300,
6
- "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -887,6 +887,224 @@
887
  "eval_samples_per_second": 37.197,
888
  "eval_steps_per_second": 18.599,
889
  "step": 1200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
890
  }
891
  ],
892
  "logging_steps": 10,
@@ -910,12 +1128,12 @@
910
  "should_evaluate": false,
911
  "should_log": false,
912
  "should_save": true,
913
- "should_training_stop": false
914
  },
915
  "attributes": {}
916
  }
917
  },
918
- "total_flos": 6.594316339249152e+16,
919
  "train_batch_size": 2,
920
  "trial_name": null,
921
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.7095513343811035,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-1500",
4
+ "epoch": 0.5104210974053595,
5
  "eval_steps": 300,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
887
  "eval_samples_per_second": 37.197,
888
  "eval_steps_per_second": 18.599,
889
  "step": 1200
890
+ },
891
+ {
892
+ "epoch": 0.4117396852403233,
893
+ "grad_norm": 0.2934724688529968,
894
+ "learning_rate": 4.7745751406263165e-06,
895
+ "loss": 1.6398,
896
+ "step": 1210
897
+ },
898
+ {
899
+ "epoch": 0.415142492556359,
900
+ "grad_norm": 0.28240928053855896,
901
+ "learning_rate": 4.460970818476718e-06,
902
+ "loss": 1.3729,
903
+ "step": 1220
904
+ },
905
+ {
906
+ "epoch": 0.41854529987239475,
907
+ "grad_norm": 0.3319728374481201,
908
+ "learning_rate": 4.15700759802175e-06,
909
+ "loss": 1.3721,
910
+ "step": 1230
911
+ },
912
+ {
913
+ "epoch": 0.42194810718843045,
914
+ "grad_norm": 0.33748745918273926,
915
+ "learning_rate": 3.862828160801707e-06,
916
+ "loss": 2.11,
917
+ "step": 1240
918
+ },
919
+ {
920
+ "epoch": 0.42535091450446616,
921
+ "grad_norm": 0.6705477833747864,
922
+ "learning_rate": 3.578570595810274e-06,
923
+ "loss": 2.0526,
924
+ "step": 1250
925
+ },
926
+ {
927
+ "epoch": 0.4287537218205019,
928
+ "grad_norm": 0.2961376905441284,
929
+ "learning_rate": 3.3043683346749647e-06,
930
+ "loss": 1.6024,
931
+ "step": 1260
932
+ },
933
+ {
934
+ "epoch": 0.4321565291365376,
935
+ "grad_norm": 0.31039202213287354,
936
+ "learning_rate": 3.040350089023844e-06,
937
+ "loss": 1.395,
938
+ "step": 1270
939
+ },
940
+ {
941
+ "epoch": 0.4355593364525734,
942
+ "grad_norm": 0.35287657380104065,
943
+ "learning_rate": 2.786639790067719e-06,
944
+ "loss": 1.4204,
945
+ "step": 1280
946
+ },
947
+ {
948
+ "epoch": 0.4389621437686091,
949
+ "grad_norm": 0.3106899857521057,
950
+ "learning_rate": 2.543356530426394e-06,
951
+ "loss": 2.0329,
952
+ "step": 1290
953
+ },
954
+ {
955
+ "epoch": 0.44236495108464485,
956
+ "grad_norm": 0.7246268391609192,
957
+ "learning_rate": 2.310614508226078e-06,
958
+ "loss": 2.1024,
959
+ "step": 1300
960
+ },
961
+ {
962
+ "epoch": 0.44576775840068056,
963
+ "grad_norm": 0.2955819070339203,
964
+ "learning_rate": 2.0885229734943502e-06,
965
+ "loss": 1.6619,
966
+ "step": 1310
967
+ },
968
+ {
969
+ "epoch": 0.44917056571671626,
970
+ "grad_norm": 0.29394975304603577,
971
+ "learning_rate": 1.8771861768777792e-06,
972
+ "loss": 1.4059,
973
+ "step": 1320
974
+ },
975
+ {
976
+ "epoch": 0.452573373032752,
977
+ "grad_norm": 0.3255048394203186,
978
+ "learning_rate": 1.67670332070623e-06,
979
+ "loss": 1.4292,
980
+ "step": 1330
981
+ },
982
+ {
983
+ "epoch": 0.45597618034878773,
984
+ "grad_norm": 0.3254324197769165,
985
+ "learning_rate": 1.4871685124269008e-06,
986
+ "loss": 2.1335,
987
+ "step": 1340
988
+ },
989
+ {
990
+ "epoch": 0.4593789876648235,
991
+ "grad_norm": 0.6185155510902405,
992
+ "learning_rate": 1.3086707204299414e-06,
993
+ "loss": 2.1161,
994
+ "step": 1350
995
+ },
996
+ {
997
+ "epoch": 0.4627817949808592,
998
+ "grad_norm": 0.29304781556129456,
999
+ "learning_rate": 1.141293732286297e-06,
1000
+ "loss": 1.6244,
1001
+ "step": 1360
1002
+ },
1003
+ {
1004
+ "epoch": 0.46618460229689496,
1005
+ "grad_norm": 0.28472885489463806,
1006
+ "learning_rate": 9.851161154175337e-07,
1007
+ "loss": 1.3787,
1008
+ "step": 1370
1009
+ },
1010
+ {
1011
+ "epoch": 0.46958740961293066,
1012
+ "grad_norm": 0.33770933747291565,
1013
+ "learning_rate": 8.402111802159412e-07,
1014
+ "loss": 1.4082,
1015
+ "step": 1380
1016
+ },
1017
+ {
1018
+ "epoch": 0.4729902169289664,
1019
+ "grad_norm": 0.3725377023220062,
1020
+ "learning_rate": 7.06646945632361e-07,
1021
+ "loss": 2.076,
1022
+ "step": 1390
1023
+ },
1024
+ {
1025
+ "epoch": 0.47639302424500213,
1026
+ "grad_norm": 0.6141946911811829,
1027
+ "learning_rate": 5.844861072478336e-07,
1028
+ "loss": 2.0871,
1029
+ "step": 1400
1030
+ },
1031
+ {
1032
+ "epoch": 0.47979583156103783,
1033
+ "grad_norm": 0.2772093713283539,
1034
+ "learning_rate": 4.7378600784402093e-07,
1035
+ "loss": 1.6158,
1036
+ "step": 1410
1037
+ },
1038
+ {
1039
+ "epoch": 0.4831986388770736,
1040
+ "grad_norm": 0.2923103868961334,
1041
+ "learning_rate": 3.745986104862903e-07,
1042
+ "loss": 1.4024,
1043
+ "step": 1420
1044
+ },
1045
+ {
1046
+ "epoch": 0.4866014461931093,
1047
+ "grad_norm": 0.33724531531333923,
1048
+ "learning_rate": 2.869704741320478e-07,
1049
+ "loss": 1.417,
1050
+ "step": 1430
1051
+ },
1052
+ {
1053
+ "epoch": 0.49000425350914506,
1054
+ "grad_norm": 0.3163939118385315,
1055
+ "learning_rate": 2.1094273177576507e-07,
1056
+ "loss": 2.1322,
1057
+ "step": 1440
1058
+ },
1059
+ {
1060
+ "epoch": 0.49340706082518077,
1061
+ "grad_norm": 0.6630510091781616,
1062
+ "learning_rate": 1.4655107114101007e-07,
1063
+ "loss": 2.0956,
1064
+ "step": 1450
1065
+ },
1066
+ {
1067
+ "epoch": 0.4968098681412165,
1068
+ "grad_norm": 0.29451075196266174,
1069
+ "learning_rate": 9.382571792846961e-08,
1070
+ "loss": 1.6392,
1071
+ "step": 1460
1072
+ },
1073
+ {
1074
+ "epoch": 0.5002126754572522,
1075
+ "grad_norm": 0.29572850465774536,
1076
+ "learning_rate": 5.279142162789019e-08,
1077
+ "loss": 1.4153,
1078
+ "step": 1470
1079
+ },
1080
+ {
1081
+ "epoch": 0.5036154827732879,
1082
+ "grad_norm": 0.3492263853549957,
1083
+ "learning_rate": 2.3467443900582198e-08,
1084
+ "loss": 1.3996,
1085
+ "step": 1480
1086
+ },
1087
+ {
1088
+ "epoch": 0.5070182900893236,
1089
+ "grad_norm": 0.3328089714050293,
1090
+ "learning_rate": 5.86754953789681e-09,
1091
+ "loss": 2.0623,
1092
+ "step": 1490
1093
+ },
1094
+ {
1095
+ "epoch": 0.5104210974053595,
1096
+ "grad_norm": 0.6787052750587463,
1097
+ "learning_rate": 0.0,
1098
+ "loss": 2.1523,
1099
+ "step": 1500
1100
+ },
1101
+ {
1102
+ "epoch": 0.5104210974053595,
1103
+ "eval_loss": 1.7095513343811035,
1104
+ "eval_runtime": 132.0071,
1105
+ "eval_samples_per_second": 37.498,
1106
+ "eval_steps_per_second": 18.749,
1107
+ "step": 1500
1108
  }
1109
  ],
1110
  "logging_steps": 10,
 
1128
  "should_evaluate": false,
1129
  "should_log": false,
1130
  "should_save": true,
1131
+ "should_training_stop": true
1132
  },
1133
  "attributes": {}
1134
  }
1135
  },
1136
+ "total_flos": 8.239759752953856e+16,
1137
  "train_batch_size": 2,
1138
  "trial_name": null,
1139
  "trial_params": null