besimray commited on
Commit
6ab2e1e
·
verified ·
1 Parent(s): 25cffe3

Training in progress, step 140, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:833eb01138783f900186e10d4e4abdce2331f847c08833bd0fa504402085a60b
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89698065e8e2f09c47a2523945d02b0d700a6e7e9682f02876feb1c2df0b556e
3
  size 45118424
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be261bb291f9b4a5d55868833557a7b0f1e34d282ee624617485ad6c262a8639
3
  size 23159290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c6bbb2ae050e5de748c89daebe89c76d78ce1d43502c409aef1191b1d589edd
3
  size 23159290
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d57cb21cc7fa77e1d0615ec039ca096efa9b134639fd673e38d6b4c2602fef16
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad8d45468f3e2058e4054b1300023f160078727b78fd8ca1d82d4d66de05c95e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61c2b4927e3039b26d377375be782c03ce853d193f96b5868ccf559441e84af9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22a860b36e177d944f54f688d2f30a0a558f05a29fd2b863008a05217bc84467
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.07654841244220734,
3
  "best_model_checkpoint": "miner_id_besimray/checkpoint-120",
4
- "epoch": 1.5434083601286175,
5
  "eval_steps": 20,
6
- "global_step": 120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -903,6 +903,154 @@
903
  "eval_samples_per_second": 25.413,
904
  "eval_steps_per_second": 2.634,
905
  "step": 120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
906
  }
907
  ],
908
  "logging_steps": 1,
@@ -917,7 +1065,7 @@
917
  "early_stopping_threshold": 0.0
918
  },
919
  "attributes": {
920
- "early_stopping_patience_counter": 0
921
  }
922
  },
923
  "TrainerControl": {
@@ -931,7 +1079,7 @@
931
  "attributes": {}
932
  }
933
  },
934
- "total_flos": 2.858372738187264e+16,
935
  "train_batch_size": 10,
936
  "trial_name": null,
937
  "trial_params": null
 
1
  {
2
  "best_metric": 0.07654841244220734,
3
  "best_model_checkpoint": "miner_id_besimray/checkpoint-120",
4
+ "epoch": 1.8006430868167203,
5
  "eval_steps": 20,
6
+ "global_step": 140,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
903
  "eval_samples_per_second": 25.413,
904
  "eval_steps_per_second": 2.634,
905
  "step": 120
906
+ },
907
+ {
908
+ "epoch": 1.5562700964630225,
909
+ "grad_norm": 0.8864114284515381,
910
+ "learning_rate": 2.043754511182191e-05,
911
+ "loss": 0.0666,
912
+ "step": 121
913
+ },
914
+ {
915
+ "epoch": 1.5691318327974275,
916
+ "grad_norm": 0.679110050201416,
917
+ "learning_rate": 1.9098300562505266e-05,
918
+ "loss": 0.0519,
919
+ "step": 122
920
+ },
921
+ {
922
+ "epoch": 1.5819935691318328,
923
+ "grad_norm": 0.8264833688735962,
924
+ "learning_rate": 1.7799792455209018e-05,
925
+ "loss": 0.0535,
926
+ "step": 123
927
+ },
928
+ {
929
+ "epoch": 1.594855305466238,
930
+ "grad_norm": 0.8187565803527832,
931
+ "learning_rate": 1.6542674627869737e-05,
932
+ "loss": 0.0726,
933
+ "step": 124
934
+ },
935
+ {
936
+ "epoch": 1.607717041800643,
937
+ "grad_norm": 1.0040825605392456,
938
+ "learning_rate": 1.5327580077171587e-05,
939
+ "loss": 0.0547,
940
+ "step": 125
941
+ },
942
+ {
943
+ "epoch": 1.6205787781350482,
944
+ "grad_norm": 0.9699273109436035,
945
+ "learning_rate": 1.415512063981339e-05,
946
+ "loss": 0.0486,
947
+ "step": 126
948
+ },
949
+ {
950
+ "epoch": 1.6334405144694535,
951
+ "grad_norm": 0.9256762266159058,
952
+ "learning_rate": 1.3025886684430467e-05,
953
+ "loss": 0.0629,
954
+ "step": 127
955
+ },
956
+ {
957
+ "epoch": 1.6463022508038585,
958
+ "grad_norm": 1.14625084400177,
959
+ "learning_rate": 1.19404468143262e-05,
960
+ "loss": 0.1237,
961
+ "step": 128
962
+ },
963
+ {
964
+ "epoch": 1.6591639871382635,
965
+ "grad_norm": 0.9128087162971497,
966
+ "learning_rate": 1.0899347581163221e-05,
967
+ "loss": 0.0844,
968
+ "step": 129
969
+ },
970
+ {
971
+ "epoch": 1.6720257234726688,
972
+ "grad_norm": 0.6387249231338501,
973
+ "learning_rate": 9.903113209758096e-06,
974
+ "loss": 0.0356,
975
+ "step": 130
976
+ },
977
+ {
978
+ "epoch": 1.684887459807074,
979
+ "grad_norm": 0.8623852729797363,
980
+ "learning_rate": 8.952245334118414e-06,
981
+ "loss": 0.037,
982
+ "step": 131
983
+ },
984
+ {
985
+ "epoch": 1.697749196141479,
986
+ "grad_norm": 0.9539658427238464,
987
+ "learning_rate": 8.047222744854943e-06,
988
+ "loss": 0.0334,
989
+ "step": 132
990
+ },
991
+ {
992
+ "epoch": 1.7106109324758842,
993
+ "grad_norm": 1.2123513221740723,
994
+ "learning_rate": 7.1885011480961164e-06,
995
+ "loss": 0.0939,
996
+ "step": 133
997
+ },
998
+ {
999
+ "epoch": 1.7234726688102895,
1000
+ "grad_norm": 1.1446812152862549,
1001
+ "learning_rate": 6.37651293602628e-06,
1002
+ "loss": 0.0887,
1003
+ "step": 134
1004
+ },
1005
+ {
1006
+ "epoch": 1.7363344051446945,
1007
+ "grad_norm": 1.11151921749115,
1008
+ "learning_rate": 5.611666969163243e-06,
1009
+ "loss": 0.077,
1010
+ "step": 135
1011
+ },
1012
+ {
1013
+ "epoch": 1.7491961414790995,
1014
+ "grad_norm": 0.8716309070587158,
1015
+ "learning_rate": 4.8943483704846475e-06,
1016
+ "loss": 0.0522,
1017
+ "step": 136
1018
+ },
1019
+ {
1020
+ "epoch": 1.762057877813505,
1021
+ "grad_norm": 1.1028003692626953,
1022
+ "learning_rate": 4.224918331506955e-06,
1023
+ "loss": 0.0752,
1024
+ "step": 137
1025
+ },
1026
+ {
1027
+ "epoch": 1.77491961414791,
1028
+ "grad_norm": 0.5544919967651367,
1029
+ "learning_rate": 3.6037139304146762e-06,
1030
+ "loss": 0.025,
1031
+ "step": 138
1032
+ },
1033
+ {
1034
+ "epoch": 1.787781350482315,
1035
+ "grad_norm": 0.9546120762825012,
1036
+ "learning_rate": 3.0310479623313127e-06,
1037
+ "loss": 0.0636,
1038
+ "step": 139
1039
+ },
1040
+ {
1041
+ "epoch": 1.8006430868167203,
1042
+ "grad_norm": 0.6825479865074158,
1043
+ "learning_rate": 2.5072087818176382e-06,
1044
+ "loss": 0.0294,
1045
+ "step": 140
1046
+ },
1047
+ {
1048
+ "epoch": 1.8006430868167203,
1049
+ "eval_loss": 0.07656604796648026,
1050
+ "eval_runtime": 6.9163,
1051
+ "eval_samples_per_second": 23.712,
1052
+ "eval_steps_per_second": 2.458,
1053
+ "step": 140
1054
  }
1055
  ],
1056
  "logging_steps": 1,
 
1065
  "early_stopping_threshold": 0.0
1066
  },
1067
  "attributes": {
1068
+ "early_stopping_patience_counter": 1
1069
  }
1070
  },
1071
  "TrainerControl": {
 
1079
  "attributes": {}
1080
  }
1081
  },
1082
+ "total_flos": 3.32938145759232e+16,
1083
  "train_batch_size": 10,
1084
  "trial_name": null,
1085
  "trial_params": null