besimray commited on
Commit
ff41647
·
verified ·
1 Parent(s): f289423

Training in progress, step 140, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb419f5181ec8da0ae8397cbfdb62b04e49fa9a08a7101d937c933c3c4e9b5b5
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57f7b5b1793e66b4e6395cd45d0b7bf1995df458d6793623a7bd9e2c0d0b928b
3
  size 45118424
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6d1ba9960dfe9f6fb31574a7377c97b9d1778e61daac9aa92ca708534f38b3a
3
  size 23159290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5e18ad39041526daf799ac3577064398b7fedac5b63062fe0b42b57a50f2bec
3
  size 23159290
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9fd0216f0d6187ef1dc8511dbb510792521c2906a3f52d90a5f66d2aa05fa7a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e59ab75e1b6101343865e80667c2c665b321f766aee719369a3ca5a3689ff09
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61c2b4927e3039b26d377375be782c03ce853d193f96b5868ccf559441e84af9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22a860b36e177d944f54f688d2f30a0a558f05a29fd2b863008a05217bc84467
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.764509916305542,
3
- "best_model_checkpoint": "miner_id_besimray/checkpoint-120",
4
- "epoch": 1.5434083601286175,
5
  "eval_steps": 20,
6
- "global_step": 120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -903,6 +903,154 @@
903
  "eval_samples_per_second": 24.097,
904
  "eval_steps_per_second": 2.498,
905
  "step": 120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
906
  }
907
  ],
908
  "logging_steps": 1,
@@ -931,7 +1079,7 @@
931
  "attributes": {}
932
  }
933
  },
934
- "total_flos": 2.858372738187264e+16,
935
  "train_batch_size": 10,
936
  "trial_name": null,
937
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7633899450302124,
3
+ "best_model_checkpoint": "miner_id_besimray/checkpoint-140",
4
+ "epoch": 1.8006430868167203,
5
  "eval_steps": 20,
6
+ "global_step": 140,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
903
  "eval_samples_per_second": 24.097,
904
  "eval_steps_per_second": 2.498,
905
  "step": 120
906
+ },
907
+ {
908
+ "epoch": 1.5562700964630225,
909
+ "grad_norm": 0.288256973028183,
910
+ "learning_rate": 2.043754511182191e-05,
911
+ "loss": 0.8183,
912
+ "step": 121
913
+ },
914
+ {
915
+ "epoch": 1.5691318327974275,
916
+ "grad_norm": 0.254046231508255,
917
+ "learning_rate": 1.9098300562505266e-05,
918
+ "loss": 0.6721,
919
+ "step": 122
920
+ },
921
+ {
922
+ "epoch": 1.5819935691318328,
923
+ "grad_norm": 0.24051472544670105,
924
+ "learning_rate": 1.7799792455209018e-05,
925
+ "loss": 0.6993,
926
+ "step": 123
927
+ },
928
+ {
929
+ "epoch": 1.594855305466238,
930
+ "grad_norm": 0.2690548002719879,
931
+ "learning_rate": 1.6542674627869737e-05,
932
+ "loss": 0.7357,
933
+ "step": 124
934
+ },
935
+ {
936
+ "epoch": 1.607717041800643,
937
+ "grad_norm": 0.2249222695827484,
938
+ "learning_rate": 1.5327580077171587e-05,
939
+ "loss": 0.6112,
940
+ "step": 125
941
+ },
942
+ {
943
+ "epoch": 1.6205787781350482,
944
+ "grad_norm": 0.2525765597820282,
945
+ "learning_rate": 1.415512063981339e-05,
946
+ "loss": 0.7281,
947
+ "step": 126
948
+ },
949
+ {
950
+ "epoch": 1.6334405144694535,
951
+ "grad_norm": 0.2448454648256302,
952
+ "learning_rate": 1.3025886684430467e-05,
953
+ "loss": 0.6699,
954
+ "step": 127
955
+ },
956
+ {
957
+ "epoch": 1.6463022508038585,
958
+ "grad_norm": 0.27227962017059326,
959
+ "learning_rate": 1.19404468143262e-05,
960
+ "loss": 0.7431,
961
+ "step": 128
962
+ },
963
+ {
964
+ "epoch": 1.6591639871382635,
965
+ "grad_norm": 0.26319149136543274,
966
+ "learning_rate": 1.0899347581163221e-05,
967
+ "loss": 0.6885,
968
+ "step": 129
969
+ },
970
+ {
971
+ "epoch": 1.6720257234726688,
972
+ "grad_norm": 0.2802058160305023,
973
+ "learning_rate": 9.903113209758096e-06,
974
+ "loss": 0.7451,
975
+ "step": 130
976
+ },
977
+ {
978
+ "epoch": 1.684887459807074,
979
+ "grad_norm": 0.23295214772224426,
980
+ "learning_rate": 8.952245334118414e-06,
981
+ "loss": 0.6393,
982
+ "step": 131
983
+ },
984
+ {
985
+ "epoch": 1.697749196141479,
986
+ "grad_norm": 0.2382490485906601,
987
+ "learning_rate": 8.047222744854943e-06,
988
+ "loss": 0.621,
989
+ "step": 132
990
+ },
991
+ {
992
+ "epoch": 1.7106109324758842,
993
+ "grad_norm": 0.26903268694877625,
994
+ "learning_rate": 7.1885011480961164e-06,
995
+ "loss": 0.8464,
996
+ "step": 133
997
+ },
998
+ {
999
+ "epoch": 1.7234726688102895,
1000
+ "grad_norm": 0.22437304258346558,
1001
+ "learning_rate": 6.37651293602628e-06,
1002
+ "loss": 0.526,
1003
+ "step": 134
1004
+ },
1005
+ {
1006
+ "epoch": 1.7363344051446945,
1007
+ "grad_norm": 0.2693169414997101,
1008
+ "learning_rate": 5.611666969163243e-06,
1009
+ "loss": 0.7231,
1010
+ "step": 135
1011
+ },
1012
+ {
1013
+ "epoch": 1.7491961414790995,
1014
+ "grad_norm": 0.24426168203353882,
1015
+ "learning_rate": 4.8943483704846475e-06,
1016
+ "loss": 0.6771,
1017
+ "step": 136
1018
+ },
1019
+ {
1020
+ "epoch": 1.762057877813505,
1021
+ "grad_norm": 0.24735158681869507,
1022
+ "learning_rate": 4.224918331506955e-06,
1023
+ "loss": 0.7126,
1024
+ "step": 137
1025
+ },
1026
+ {
1027
+ "epoch": 1.77491961414791,
1028
+ "grad_norm": 0.2898198962211609,
1029
+ "learning_rate": 3.6037139304146762e-06,
1030
+ "loss": 0.838,
1031
+ "step": 138
1032
+ },
1033
+ {
1034
+ "epoch": 1.787781350482315,
1035
+ "grad_norm": 0.22501428425312042,
1036
+ "learning_rate": 3.0310479623313127e-06,
1037
+ "loss": 0.6003,
1038
+ "step": 139
1039
+ },
1040
+ {
1041
+ "epoch": 1.8006430868167203,
1042
+ "grad_norm": 0.2505525052547455,
1043
+ "learning_rate": 2.5072087818176382e-06,
1044
+ "loss": 0.6793,
1045
+ "step": 140
1046
+ },
1047
+ {
1048
+ "epoch": 1.8006430868167203,
1049
+ "eval_loss": 0.7633899450302124,
1050
+ "eval_runtime": 6.8979,
1051
+ "eval_samples_per_second": 23.775,
1052
+ "eval_steps_per_second": 2.465,
1053
+ "step": 140
1054
  }
1055
  ],
1056
  "logging_steps": 1,
 
1079
  "attributes": {}
1080
  }
1081
  },
1082
+ "total_flos": 3.32938145759232e+16,
1083
  "train_batch_size": 10,
1084
  "trial_name": null,
1085
  "trial_params": null