besimray commited on
Commit
8257b18
·
verified ·
1 Parent(s): 8dc8eb5

Training in progress, step 140, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85fd3349fb0b6d9fe33609fd627bbfa198712a033c54895c31d1954fff998d2f
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8f124971f6e45e066092c45814571663abeeb1c7c37d9b8a60eedfc0a06e172
3
  size 45118424
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ef667d6928d98b9252d546a69290d43eb4b3268ece09ea8d4b8985820baaa05
3
  size 23159290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fe17862cdc22820617ba77c353485eff09dcb153edbcbcced750cf2df5f273b
3
  size 23159290
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9fd0216f0d6187ef1dc8511dbb510792521c2906a3f52d90a5f66d2aa05fa7a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e59ab75e1b6101343865e80667c2c665b321f766aee719369a3ca5a3689ff09
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61c2b4927e3039b26d377375be782c03ce853d193f96b5868ccf559441e84af9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22a860b36e177d944f54f688d2f30a0a558f05a29fd2b863008a05217bc84467
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.7635705471038818,
3
- "best_model_checkpoint": "miner_id_besimray/checkpoint-120",
4
- "epoch": 1.5434083601286175,
5
  "eval_steps": 20,
6
- "global_step": 120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -903,6 +903,154 @@
903
  "eval_samples_per_second": 28.406,
904
  "eval_steps_per_second": 2.945,
905
  "step": 120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
906
  }
907
  ],
908
  "logging_steps": 1,
@@ -931,7 +1079,7 @@
931
  "attributes": {}
932
  }
933
  },
934
- "total_flos": 2.858372738187264e+16,
935
  "train_batch_size": 10,
936
  "trial_name": null,
937
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7629328370094299,
3
+ "best_model_checkpoint": "miner_id_besimray/checkpoint-140",
4
+ "epoch": 1.8006430868167203,
5
  "eval_steps": 20,
6
+ "global_step": 140,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
903
  "eval_samples_per_second": 28.406,
904
  "eval_steps_per_second": 2.945,
905
  "step": 120
906
+ },
907
+ {
908
+ "epoch": 1.5562700964630225,
909
+ "grad_norm": 0.2886348068714142,
910
+ "learning_rate": 2.043754511182191e-05,
911
+ "loss": 0.8158,
912
+ "step": 121
913
+ },
914
+ {
915
+ "epoch": 1.5691318327974275,
916
+ "grad_norm": 0.2530727982521057,
917
+ "learning_rate": 1.9098300562505266e-05,
918
+ "loss": 0.6713,
919
+ "step": 122
920
+ },
921
+ {
922
+ "epoch": 1.5819935691318328,
923
+ "grad_norm": 0.24064087867736816,
924
+ "learning_rate": 1.7799792455209018e-05,
925
+ "loss": 0.6981,
926
+ "step": 123
927
+ },
928
+ {
929
+ "epoch": 1.594855305466238,
930
+ "grad_norm": 0.2673289477825165,
931
+ "learning_rate": 1.6542674627869737e-05,
932
+ "loss": 0.7346,
933
+ "step": 124
934
+ },
935
+ {
936
+ "epoch": 1.607717041800643,
937
+ "grad_norm": 0.22544747591018677,
938
+ "learning_rate": 1.5327580077171587e-05,
939
+ "loss": 0.6114,
940
+ "step": 125
941
+ },
942
+ {
943
+ "epoch": 1.6205787781350482,
944
+ "grad_norm": 0.2511521577835083,
945
+ "learning_rate": 1.415512063981339e-05,
946
+ "loss": 0.7274,
947
+ "step": 126
948
+ },
949
+ {
950
+ "epoch": 1.6334405144694535,
951
+ "grad_norm": 0.2477598786354065,
952
+ "learning_rate": 1.3025886684430467e-05,
953
+ "loss": 0.6706,
954
+ "step": 127
955
+ },
956
+ {
957
+ "epoch": 1.6463022508038585,
958
+ "grad_norm": 0.2672538161277771,
959
+ "learning_rate": 1.19404468143262e-05,
960
+ "loss": 0.7443,
961
+ "step": 128
962
+ },
963
+ {
964
+ "epoch": 1.6591639871382635,
965
+ "grad_norm": 0.26399627327919006,
966
+ "learning_rate": 1.0899347581163221e-05,
967
+ "loss": 0.6866,
968
+ "step": 129
969
+ },
970
+ {
971
+ "epoch": 1.6720257234726688,
972
+ "grad_norm": 0.2790736258029938,
973
+ "learning_rate": 9.903113209758096e-06,
974
+ "loss": 0.7447,
975
+ "step": 130
976
+ },
977
+ {
978
+ "epoch": 1.684887459807074,
979
+ "grad_norm": 0.23339378833770752,
980
+ "learning_rate": 8.952245334118414e-06,
981
+ "loss": 0.6387,
982
+ "step": 131
983
+ },
984
+ {
985
+ "epoch": 1.697749196141479,
986
+ "grad_norm": 0.23756496608257294,
987
+ "learning_rate": 8.047222744854943e-06,
988
+ "loss": 0.6194,
989
+ "step": 132
990
+ },
991
+ {
992
+ "epoch": 1.7106109324758842,
993
+ "grad_norm": 0.27553918957710266,
994
+ "learning_rate": 7.1885011480961164e-06,
995
+ "loss": 0.848,
996
+ "step": 133
997
+ },
998
+ {
999
+ "epoch": 1.7234726688102895,
1000
+ "grad_norm": 0.2218138724565506,
1001
+ "learning_rate": 6.37651293602628e-06,
1002
+ "loss": 0.5259,
1003
+ "step": 134
1004
+ },
1005
+ {
1006
+ "epoch": 1.7363344051446945,
1007
+ "grad_norm": 0.2685801386833191,
1008
+ "learning_rate": 5.611666969163243e-06,
1009
+ "loss": 0.7221,
1010
+ "step": 135
1011
+ },
1012
+ {
1013
+ "epoch": 1.7491961414790995,
1014
+ "grad_norm": 0.24097266793251038,
1015
+ "learning_rate": 4.8943483704846475e-06,
1016
+ "loss": 0.6776,
1017
+ "step": 136
1018
+ },
1019
+ {
1020
+ "epoch": 1.762057877813505,
1021
+ "grad_norm": 0.24491430819034576,
1022
+ "learning_rate": 4.224918331506955e-06,
1023
+ "loss": 0.7152,
1024
+ "step": 137
1025
+ },
1026
+ {
1027
+ "epoch": 1.77491961414791,
1028
+ "grad_norm": 0.28753861784935,
1029
+ "learning_rate": 3.6037139304146762e-06,
1030
+ "loss": 0.8375,
1031
+ "step": 138
1032
+ },
1033
+ {
1034
+ "epoch": 1.787781350482315,
1035
+ "grad_norm": 0.2249891459941864,
1036
+ "learning_rate": 3.0310479623313127e-06,
1037
+ "loss": 0.6006,
1038
+ "step": 139
1039
+ },
1040
+ {
1041
+ "epoch": 1.8006430868167203,
1042
+ "grad_norm": 0.24978116154670715,
1043
+ "learning_rate": 2.5072087818176382e-06,
1044
+ "loss": 0.6807,
1045
+ "step": 140
1046
+ },
1047
+ {
1048
+ "epoch": 1.8006430868167203,
1049
+ "eval_loss": 0.7629328370094299,
1050
+ "eval_runtime": 7.0132,
1051
+ "eval_samples_per_second": 23.385,
1052
+ "eval_steps_per_second": 2.424,
1053
+ "step": 140
1054
  }
1055
  ],
1056
  "logging_steps": 1,
 
1079
  "attributes": {}
1080
  }
1081
  },
1082
+ "total_flos": 3.32938145759232e+16,
1083
  "train_batch_size": 10,
1084
  "trial_name": null,
1085
  "trial_params": null