Training in progress, step 150, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 45118424
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9ac5f431e68bc0be3e94d65a484f12f766b83dc919f1a2a7192b4fa03404081
|
| 3 |
size 45118424
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 23159290
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:464594457eec030889e13ff4a83de8b19dfb54d7712bd3f1c4259eaf0755261d
|
| 3 |
size 23159290
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d11c7a6a55b1fb167809df49e86bfb5922b63262ff5a72c4acf98ce212bd8ed
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:194456d3c9e165255d5406a0f3f62973b0bede79d91784f72431350783e27ae7
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7629328370094299,
|
| 3 |
"best_model_checkpoint": "miner_id_besimray/checkpoint-140",
|
| 4 |
-
"epoch": 1.
|
| 5 |
"eval_steps": 20,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -1051,6 +1051,76 @@
|
|
| 1051 |
"eval_samples_per_second": 23.385,
|
| 1052 |
"eval_steps_per_second": 2.424,
|
| 1053 |
"step": 140
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1054 |
}
|
| 1055 |
],
|
| 1056 |
"logging_steps": 1,
|
|
@@ -1074,12 +1144,12 @@
|
|
| 1074 |
"should_evaluate": false,
|
| 1075 |
"should_log": false,
|
| 1076 |
"should_save": true,
|
| 1077 |
-
"should_training_stop":
|
| 1078 |
},
|
| 1079 |
"attributes": {}
|
| 1080 |
}
|
| 1081 |
},
|
| 1082 |
-
"total_flos": 3.
|
| 1083 |
"train_batch_size": 10,
|
| 1084 |
"trial_name": null,
|
| 1085 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7629328370094299,
|
| 3 |
"best_model_checkpoint": "miner_id_besimray/checkpoint-140",
|
| 4 |
+
"epoch": 1.9292604501607717,
|
| 5 |
"eval_steps": 20,
|
| 6 |
+
"global_step": 150,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 1051 |
"eval_samples_per_second": 23.385,
|
| 1052 |
"eval_steps_per_second": 2.424,
|
| 1053 |
"step": 140
|
| 1054 |
+
},
|
| 1055 |
+
{
|
| 1056 |
+
"epoch": 1.8135048231511255,
|
| 1057 |
+
"grad_norm": 0.2787846624851227,
|
| 1058 |
+
"learning_rate": 2.032460157676452e-06,
|
| 1059 |
+
"loss": 0.697,
|
| 1060 |
+
"step": 141
|
| 1061 |
+
},
|
| 1062 |
+
{
|
| 1063 |
+
"epoch": 1.8263665594855305,
|
| 1064 |
+
"grad_norm": 0.26291611790657043,
|
| 1065 |
+
"learning_rate": 1.6070411401370334e-06,
|
| 1066 |
+
"loss": 0.7866,
|
| 1067 |
+
"step": 142
|
| 1068 |
+
},
|
| 1069 |
+
{
|
| 1070 |
+
"epoch": 1.8392282958199357,
|
| 1071 |
+
"grad_norm": 0.22387652099132538,
|
| 1072 |
+
"learning_rate": 1.231165940486234e-06,
|
| 1073 |
+
"loss": 0.6242,
|
| 1074 |
+
"step": 143
|
| 1075 |
+
},
|
| 1076 |
+
{
|
| 1077 |
+
"epoch": 1.852090032154341,
|
| 1078 |
+
"grad_norm": 0.25877615809440613,
|
| 1079 |
+
"learning_rate": 9.0502382320653e-07,
|
| 1080 |
+
"loss": 0.7036,
|
| 1081 |
+
"step": 144
|
| 1082 |
+
},
|
| 1083 |
+
{
|
| 1084 |
+
"epoch": 1.864951768488746,
|
| 1085 |
+
"grad_norm": 0.24913811683654785,
|
| 1086 |
+
"learning_rate": 6.287790106757396e-07,
|
| 1087 |
+
"loss": 0.7355,
|
| 1088 |
+
"step": 145
|
| 1089 |
+
},
|
| 1090 |
+
{
|
| 1091 |
+
"epoch": 1.877813504823151,
|
| 1092 |
+
"grad_norm": 0.22730277478694916,
|
| 1093 |
+
"learning_rate": 4.025706004760932e-07,
|
| 1094 |
+
"loss": 0.6122,
|
| 1095 |
+
"step": 146
|
| 1096 |
+
},
|
| 1097 |
+
{
|
| 1098 |
+
"epoch": 1.8906752411575563,
|
| 1099 |
+
"grad_norm": 0.2412744015455246,
|
| 1100 |
+
"learning_rate": 2.265124953543918e-07,
|
| 1101 |
+
"loss": 0.7369,
|
| 1102 |
+
"step": 147
|
| 1103 |
+
},
|
| 1104 |
+
{
|
| 1105 |
+
"epoch": 1.9035369774919615,
|
| 1106 |
+
"grad_norm": 0.23351891338825226,
|
| 1107 |
+
"learning_rate": 1.0069334586854107e-07,
|
| 1108 |
+
"loss": 0.6705,
|
| 1109 |
+
"step": 148
|
| 1110 |
+
},
|
| 1111 |
+
{
|
| 1112 |
+
"epoch": 1.9163987138263665,
|
| 1113 |
+
"grad_norm": 0.2615411877632141,
|
| 1114 |
+
"learning_rate": 2.5176505749346936e-08,
|
| 1115 |
+
"loss": 0.7548,
|
| 1116 |
+
"step": 149
|
| 1117 |
+
},
|
| 1118 |
+
{
|
| 1119 |
+
"epoch": 1.9292604501607717,
|
| 1120 |
+
"grad_norm": 0.29780200123786926,
|
| 1121 |
+
"learning_rate": 0.0,
|
| 1122 |
+
"loss": 0.8051,
|
| 1123 |
+
"step": 150
|
| 1124 |
}
|
| 1125 |
],
|
| 1126 |
"logging_steps": 1,
|
|
|
|
| 1144 |
"should_evaluate": false,
|
| 1145 |
"should_log": false,
|
| 1146 |
"should_save": true,
|
| 1147 |
+
"should_training_stop": true
|
| 1148 |
},
|
| 1149 |
"attributes": {}
|
| 1150 |
}
|
| 1151 |
},
|
| 1152 |
+
"total_flos": 3.58265259884544e+16,
|
| 1153 |
"train_batch_size": 10,
|
| 1154 |
"trial_name": null,
|
| 1155 |
"trial_params": null
|