Training in progress, step 1500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 528526760
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff35e84e01f1386ebac587ae0cb8f62443ce9bfbcaf664da61c9fb02006570c0
|
| 3 |
size 528526760
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 141172038
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e22ca59118b10b5eb52fb341cd120c1968f04534206e934f766d9a6e8dec217
|
| 3 |
size 141172038
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3048ecfad89600ff1c80bedbf89688a52e92cfd26e80e4cb6be8f6aab7986d5
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2080
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53869905448aa5dcb85f704ccd358f2990095cda7a0cb40de61a3fe333d56cc8
|
| 3 |
size 2080
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 2.450434923171997,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-1200",
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 100,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -1107,6 +1107,84 @@
|
|
| 1107 |
"eval_samples_per_second": 4.015,
|
| 1108 |
"eval_steps_per_second": 2.017,
|
| 1109 |
"step": 1400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1110 |
}
|
| 1111 |
],
|
| 1112 |
"logging_steps": 10,
|
|
@@ -1121,7 +1199,7 @@
|
|
| 1121 |
"early_stopping_threshold": 0.0
|
| 1122 |
},
|
| 1123 |
"attributes": {
|
| 1124 |
-
"early_stopping_patience_counter":
|
| 1125 |
}
|
| 1126 |
},
|
| 1127 |
"TrainerControl": {
|
|
@@ -1130,12 +1208,12 @@
|
|
| 1130 |
"should_evaluate": false,
|
| 1131 |
"should_log": false,
|
| 1132 |
"should_save": true,
|
| 1133 |
-
"should_training_stop":
|
| 1134 |
},
|
| 1135 |
"attributes": {}
|
| 1136 |
}
|
| 1137 |
},
|
| 1138 |
-
"total_flos": 2.
|
| 1139 |
"train_batch_size": 2,
|
| 1140 |
"trial_name": null,
|
| 1141 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 2.450434923171997,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-1200",
|
| 4 |
+
"epoch": 0.05819536182966217,
|
| 5 |
"eval_steps": 100,
|
| 6 |
+
"global_step": 1500,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 1107 |
"eval_samples_per_second": 4.015,
|
| 1108 |
"eval_steps_per_second": 2.017,
|
| 1109 |
"step": 1400
|
| 1110 |
+
},
|
| 1111 |
+
{
|
| 1112 |
+
"epoch": 0.05470364011988244,
|
| 1113 |
+
"grad_norm": 4.387486934661865,
|
| 1114 |
+
"learning_rate": 0.00019984273239970207,
|
| 1115 |
+
"loss": 10.1819,
|
| 1116 |
+
"step": 1410
|
| 1117 |
+
},
|
| 1118 |
+
{
|
| 1119 |
+
"epoch": 0.05509160919874686,
|
| 1120 |
+
"grad_norm": 5.688685894012451,
|
| 1121 |
+
"learning_rate": 0.00019984044774901122,
|
| 1122 |
+
"loss": 8.8118,
|
| 1123 |
+
"step": 1420
|
| 1124 |
+
},
|
| 1125 |
+
{
|
| 1126 |
+
"epoch": 0.055479578277611276,
|
| 1127 |
+
"grad_norm": 5.5436835289001465,
|
| 1128 |
+
"learning_rate": 0.00019983814854640514,
|
| 1129 |
+
"loss": 9.8809,
|
| 1130 |
+
"step": 1430
|
| 1131 |
+
},
|
| 1132 |
+
{
|
| 1133 |
+
"epoch": 0.055867547356475686,
|
| 1134 |
+
"grad_norm": 6.585811614990234,
|
| 1135 |
+
"learning_rate": 0.00019983583479188383,
|
| 1136 |
+
"loss": 9.4094,
|
| 1137 |
+
"step": 1440
|
| 1138 |
+
},
|
| 1139 |
+
{
|
| 1140 |
+
"epoch": 0.0562555164353401,
|
| 1141 |
+
"grad_norm": 9.564101219177246,
|
| 1142 |
+
"learning_rate": 0.00019983349193353206,
|
| 1143 |
+
"loss": 10.8473,
|
| 1144 |
+
"step": 1450
|
| 1145 |
+
},
|
| 1146 |
+
{
|
| 1147 |
+
"epoch": 0.05664348551420452,
|
| 1148 |
+
"grad_norm": 3.8079373836517334,
|
| 1149 |
+
"learning_rate": 0.00019983113452326506,
|
| 1150 |
+
"loss": 9.3833,
|
| 1151 |
+
"step": 1460
|
| 1152 |
+
},
|
| 1153 |
+
{
|
| 1154 |
+
"epoch": 0.05703145459306893,
|
| 1155 |
+
"grad_norm": 4.716315269470215,
|
| 1156 |
+
"learning_rate": 0.00019982877711299807,
|
| 1157 |
+
"loss": 9.0148,
|
| 1158 |
+
"step": 1470
|
| 1159 |
+
},
|
| 1160 |
+
{
|
| 1161 |
+
"epoch": 0.057419423671933346,
|
| 1162 |
+
"grad_norm": 5.183149337768555,
|
| 1163 |
+
"learning_rate": 0.0001998263760469854,
|
| 1164 |
+
"loss": 10.1636,
|
| 1165 |
+
"step": 1480
|
| 1166 |
+
},
|
| 1167 |
+
{
|
| 1168 |
+
"epoch": 0.05780739275079776,
|
| 1169 |
+
"grad_norm": 7.416577339172363,
|
| 1170 |
+
"learning_rate": 0.00019982398953288794,
|
| 1171 |
+
"loss": 10.1869,
|
| 1172 |
+
"step": 1490
|
| 1173 |
+
},
|
| 1174 |
+
{
|
| 1175 |
+
"epoch": 0.05819536182966217,
|
| 1176 |
+
"grad_norm": 10.7798433303833,
|
| 1177 |
+
"learning_rate": 0.0001998215593630448,
|
| 1178 |
+
"loss": 10.5461,
|
| 1179 |
+
"step": 1500
|
| 1180 |
+
},
|
| 1181 |
+
{
|
| 1182 |
+
"epoch": 0.05819536182966217,
|
| 1183 |
+
"eval_loss": 2.462498426437378,
|
| 1184 |
+
"eval_runtime": 51.1983,
|
| 1185 |
+
"eval_samples_per_second": 4.043,
|
| 1186 |
+
"eval_steps_per_second": 2.031,
|
| 1187 |
+
"step": 1500
|
| 1188 |
}
|
| 1189 |
],
|
| 1190 |
"logging_steps": 10,
|
|
|
|
| 1199 |
"early_stopping_threshold": 0.0
|
| 1200 |
},
|
| 1201 |
"attributes": {
|
| 1202 |
+
"early_stopping_patience_counter": 3
|
| 1203 |
}
|
| 1204 |
},
|
| 1205 |
"TrainerControl": {
|
|
|
|
| 1208 |
"should_evaluate": false,
|
| 1209 |
"should_log": false,
|
| 1210 |
"should_save": true,
|
| 1211 |
+
"should_training_stop": true
|
| 1212 |
},
|
| 1213 |
"attributes": {}
|
| 1214 |
}
|
| 1215 |
},
|
| 1216 |
+
"total_flos": 2.2296408461785498e+17,
|
| 1217 |
"train_batch_size": 2,
|
| 1218 |
"trial_name": null,
|
| 1219 |
"trial_params": null
|