Training in progress, step 4071, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 100966336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:019f7d564a5c9d518bec0e30d9649ba99937af670238d8452968fa5b1413a528
|
| 3 |
size 100966336
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 51613668
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72e4711ba0b347f94b175a3390cd1f352cd517007e6176a860b09cd59f1edf4e
|
| 3 |
size 51613668
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0c791d11fe3a154567778d2d9453c793a7de48963a5332d5e4afe615c58f1f4
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c84513d13cd2d96eb71f4021c545ed7c31d2c720ba1e9098bc883af81a53ca7
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.21696823835372925,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-4000",
|
| 4 |
-
"epoch": 2.
|
| 5 |
"eval_steps": 200,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -28175,6 +28175,503 @@
|
|
| 28175 |
"eval_samples_per_second": 31.796,
|
| 28176 |
"eval_steps_per_second": 2.033,
|
| 28177 |
"step": 4000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28178 |
}
|
| 28179 |
],
|
| 28180 |
"logging_steps": 1,
|
|
@@ -28198,12 +28695,12 @@
|
|
| 28198 |
"should_evaluate": false,
|
| 28199 |
"should_log": false,
|
| 28200 |
"should_save": true,
|
| 28201 |
-
"should_training_stop":
|
| 28202 |
},
|
| 28203 |
"attributes": {}
|
| 28204 |
}
|
| 28205 |
},
|
| 28206 |
-
"total_flos": 4.
|
| 28207 |
"train_batch_size": 16,
|
| 28208 |
"trial_name": null,
|
| 28209 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.21696823835372925,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-4000",
|
| 4 |
+
"epoch": 2.998895027624309,
|
| 5 |
"eval_steps": 200,
|
| 6 |
+
"global_step": 4071,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 28175 |
"eval_samples_per_second": 31.796,
|
| 28176 |
"eval_steps_per_second": 2.033,
|
| 28177 |
"step": 4000
|
| 28178 |
+
},
|
| 28179 |
+
{
|
| 28180 |
+
"epoch": 2.947329650092081,
|
| 28181 |
+
"grad_norm": 0.3928582966327667,
|
| 28182 |
+
"learning_rate": 1.480406367429299e-07,
|
| 28183 |
+
"loss": 0.1097,
|
| 28184 |
+
"step": 4001
|
| 28185 |
+
},
|
| 28186 |
+
{
|
| 28187 |
+
"epoch": 2.9480662983425416,
|
| 28188 |
+
"grad_norm": 0.4033927619457245,
|
| 28189 |
+
"learning_rate": 1.4384212371320039e-07,
|
| 28190 |
+
"loss": 0.1031,
|
| 28191 |
+
"step": 4002
|
| 28192 |
+
},
|
| 28193 |
+
{
|
| 28194 |
+
"epoch": 2.948802946593002,
|
| 28195 |
+
"grad_norm": 0.31086212396621704,
|
| 28196 |
+
"learning_rate": 1.397039634071895e-07,
|
| 28197 |
+
"loss": 0.0823,
|
| 28198 |
+
"step": 4003
|
| 28199 |
+
},
|
| 28200 |
+
{
|
| 28201 |
+
"epoch": 2.949539594843462,
|
| 28202 |
+
"grad_norm": 0.363800972700119,
|
| 28203 |
+
"learning_rate": 1.3562615832597437e-07,
|
| 28204 |
+
"loss": 0.1162,
|
| 28205 |
+
"step": 4004
|
| 28206 |
+
},
|
| 28207 |
+
{
|
| 28208 |
+
"epoch": 2.9502762430939224,
|
| 28209 |
+
"grad_norm": 0.36636438965797424,
|
| 28210 |
+
"learning_rate": 1.3160871093416128e-07,
|
| 28211 |
+
"loss": 0.1065,
|
| 28212 |
+
"step": 4005
|
| 28213 |
+
},
|
| 28214 |
+
{
|
| 28215 |
+
"epoch": 2.951012891344383,
|
| 28216 |
+
"grad_norm": 0.44840720295906067,
|
| 28217 |
+
"learning_rate": 1.276516236598857e-07,
|
| 28218 |
+
"loss": 0.139,
|
| 28219 |
+
"step": 4006
|
| 28220 |
+
},
|
| 28221 |
+
{
|
| 28222 |
+
"epoch": 2.9517495395948434,
|
| 28223 |
+
"grad_norm": 0.37990802526474,
|
| 28224 |
+
"learning_rate": 1.237548988948123e-07,
|
| 28225 |
+
"loss": 0.1155,
|
| 28226 |
+
"step": 4007
|
| 28227 |
+
},
|
| 28228 |
+
{
|
| 28229 |
+
"epoch": 2.952486187845304,
|
| 28230 |
+
"grad_norm": 0.3310796618461609,
|
| 28231 |
+
"learning_rate": 1.1991853899409044e-07,
|
| 28232 |
+
"loss": 0.1023,
|
| 28233 |
+
"step": 4008
|
| 28234 |
+
},
|
| 28235 |
+
{
|
| 28236 |
+
"epoch": 2.9532228360957644,
|
| 28237 |
+
"grad_norm": 0.525777280330658,
|
| 28238 |
+
"learning_rate": 1.1614254627640986e-07,
|
| 28239 |
+
"loss": 0.1692,
|
| 28240 |
+
"step": 4009
|
| 28241 |
+
},
|
| 28242 |
+
{
|
| 28243 |
+
"epoch": 2.9539594843462247,
|
| 28244 |
+
"grad_norm": 0.3540642559528351,
|
| 28245 |
+
"learning_rate": 1.1242692302395608e-07,
|
| 28246 |
+
"loss": 0.0742,
|
| 28247 |
+
"step": 4010
|
| 28248 |
+
},
|
| 28249 |
+
{
|
| 28250 |
+
"epoch": 2.954696132596685,
|
| 28251 |
+
"grad_norm": 0.4345633089542389,
|
| 28252 |
+
"learning_rate": 1.0877167148246604e-07,
|
| 28253 |
+
"loss": 0.1342,
|
| 28254 |
+
"step": 4011
|
| 28255 |
+
},
|
| 28256 |
+
{
|
| 28257 |
+
"epoch": 2.9554327808471452,
|
| 28258 |
+
"grad_norm": 0.38242772221565247,
|
| 28259 |
+
"learning_rate": 1.0517679386113922e-07,
|
| 28260 |
+
"loss": 0.1004,
|
| 28261 |
+
"step": 4012
|
| 28262 |
+
},
|
| 28263 |
+
{
|
| 28264 |
+
"epoch": 2.956169429097606,
|
| 28265 |
+
"grad_norm": 0.43410253524780273,
|
| 28266 |
+
"learning_rate": 1.0164229233271538e-07,
|
| 28267 |
+
"loss": 0.1084,
|
| 28268 |
+
"step": 4013
|
| 28269 |
+
},
|
| 28270 |
+
{
|
| 28271 |
+
"epoch": 2.956906077348066,
|
| 28272 |
+
"grad_norm": 0.5683735609054565,
|
| 28273 |
+
"learning_rate": 9.816816903343018e-08,
|
| 28274 |
+
"loss": 0.1536,
|
| 28275 |
+
"step": 4014
|
| 28276 |
+
},
|
| 28277 |
+
{
|
| 28278 |
+
"epoch": 2.957642725598527,
|
| 28279 |
+
"grad_norm": 0.3811461627483368,
|
| 28280 |
+
"learning_rate": 9.47544260630373e-08,
|
| 28281 |
+
"loss": 0.0991,
|
| 28282 |
+
"step": 4015
|
| 28283 |
+
},
|
| 28284 |
+
{
|
| 28285 |
+
"epoch": 2.958379373848987,
|
| 28286 |
+
"grad_norm": 0.45557355880737305,
|
| 28287 |
+
"learning_rate": 9.140106548478633e-08,
|
| 28288 |
+
"loss": 0.1312,
|
| 28289 |
+
"step": 4016
|
| 28290 |
+
},
|
| 28291 |
+
{
|
| 28292 |
+
"epoch": 2.9591160220994475,
|
| 28293 |
+
"grad_norm": 0.3726716935634613,
|
| 28294 |
+
"learning_rate": 8.81080893254449e-08,
|
| 28295 |
+
"loss": 0.1225,
|
| 28296 |
+
"step": 4017
|
| 28297 |
+
},
|
| 28298 |
+
{
|
| 28299 |
+
"epoch": 2.9598526703499077,
|
| 28300 |
+
"grad_norm": 0.44345927238464355,
|
| 28301 |
+
"learning_rate": 8.487549957526541e-08,
|
| 28302 |
+
"loss": 0.1332,
|
| 28303 |
+
"step": 4018
|
| 28304 |
+
},
|
| 28305 |
+
{
|
| 28306 |
+
"epoch": 2.9605893186003684,
|
| 28307 |
+
"grad_norm": 0.41493895649909973,
|
| 28308 |
+
"learning_rate": 8.170329818802947e-08,
|
| 28309 |
+
"loss": 0.1248,
|
| 28310 |
+
"step": 4019
|
| 28311 |
+
},
|
| 28312 |
+
{
|
| 28313 |
+
"epoch": 2.9613259668508287,
|
| 28314 |
+
"grad_norm": 0.4436560869216919,
|
| 28315 |
+
"learning_rate": 7.859148708099228e-08,
|
| 28316 |
+
"loss": 0.1317,
|
| 28317 |
+
"step": 4020
|
| 28318 |
+
},
|
| 28319 |
+
{
|
| 28320 |
+
"epoch": 2.962062615101289,
|
| 28321 |
+
"grad_norm": 0.3871510624885559,
|
| 28322 |
+
"learning_rate": 7.554006813491609e-08,
|
| 28323 |
+
"loss": 0.1029,
|
| 28324 |
+
"step": 4021
|
| 28325 |
+
},
|
| 28326 |
+
{
|
| 28327 |
+
"epoch": 2.9627992633517497,
|
| 28328 |
+
"grad_norm": 0.42555782198905945,
|
| 28329 |
+
"learning_rate": 7.254904319409229e-08,
|
| 28330 |
+
"loss": 0.1519,
|
| 28331 |
+
"step": 4022
|
| 28332 |
+
},
|
| 28333 |
+
{
|
| 28334 |
+
"epoch": 2.96353591160221,
|
| 28335 |
+
"grad_norm": 0.4393730163574219,
|
| 28336 |
+
"learning_rate": 6.961841406626368e-08,
|
| 28337 |
+
"loss": 0.0969,
|
| 28338 |
+
"step": 4023
|
| 28339 |
+
},
|
| 28340 |
+
{
|
| 28341 |
+
"epoch": 2.9642725598526702,
|
| 28342 |
+
"grad_norm": 0.43214312195777893,
|
| 28343 |
+
"learning_rate": 6.674818252270232e-08,
|
| 28344 |
+
"loss": 0.1308,
|
| 28345 |
+
"step": 4024
|
| 28346 |
+
},
|
| 28347 |
+
{
|
| 28348 |
+
"epoch": 2.9650092081031305,
|
| 28349 |
+
"grad_norm": 0.3302169442176819,
|
| 28350 |
+
"learning_rate": 6.393835029816497e-08,
|
| 28351 |
+
"loss": 0.1412,
|
| 28352 |
+
"step": 4025
|
| 28353 |
+
},
|
| 28354 |
+
{
|
| 28355 |
+
"epoch": 2.9657458563535912,
|
| 28356 |
+
"grad_norm": 0.42520150542259216,
|
| 28357 |
+
"learning_rate": 6.11889190909043e-08,
|
| 28358 |
+
"loss": 0.1354,
|
| 28359 |
+
"step": 4026
|
| 28360 |
+
},
|
| 28361 |
+
{
|
| 28362 |
+
"epoch": 2.9664825046040515,
|
| 28363 |
+
"grad_norm": 0.4056524932384491,
|
| 28364 |
+
"learning_rate": 5.84998905626799e-08,
|
| 28365 |
+
"loss": 0.1074,
|
| 28366 |
+
"step": 4027
|
| 28367 |
+
},
|
| 28368 |
+
{
|
| 28369 |
+
"epoch": 2.967219152854512,
|
| 28370 |
+
"grad_norm": 0.40226319432258606,
|
| 28371 |
+
"learning_rate": 5.5871266338702876e-08,
|
| 28372 |
+
"loss": 0.1444,
|
| 28373 |
+
"step": 4028
|
| 28374 |
+
},
|
| 28375 |
+
{
|
| 28376 |
+
"epoch": 2.9679558011049725,
|
| 28377 |
+
"grad_norm": 0.370077520608902,
|
| 28378 |
+
"learning_rate": 5.330304800772456e-08,
|
| 28379 |
+
"loss": 0.1096,
|
| 28380 |
+
"step": 4029
|
| 28381 |
+
},
|
| 28382 |
+
{
|
| 28383 |
+
"epoch": 2.9686924493554327,
|
| 28384 |
+
"grad_norm": 0.4626006782054901,
|
| 28385 |
+
"learning_rate": 5.0795237121969984e-08,
|
| 28386 |
+
"loss": 0.145,
|
| 28387 |
+
"step": 4030
|
| 28388 |
+
},
|
| 28389 |
+
{
|
| 28390 |
+
"epoch": 2.969429097605893,
|
| 28391 |
+
"grad_norm": 0.45213082432746887,
|
| 28392 |
+
"learning_rate": 4.8347835197137814e-08,
|
| 28393 |
+
"loss": 0.1208,
|
| 28394 |
+
"step": 4031
|
| 28395 |
+
},
|
| 28396 |
+
{
|
| 28397 |
+
"epoch": 2.9701657458563537,
|
| 28398 |
+
"grad_norm": 0.43094053864479065,
|
| 28399 |
+
"learning_rate": 4.59608437124337e-08,
|
| 28400 |
+
"loss": 0.1237,
|
| 28401 |
+
"step": 4032
|
| 28402 |
+
},
|
| 28403 |
+
{
|
| 28404 |
+
"epoch": 2.970902394106814,
|
| 28405 |
+
"grad_norm": 0.42492741346359253,
|
| 28406 |
+
"learning_rate": 4.363426411055915e-08,
|
| 28407 |
+
"loss": 0.1358,
|
| 28408 |
+
"step": 4033
|
| 28409 |
+
},
|
| 28410 |
+
{
|
| 28411 |
+
"epoch": 2.9716390423572743,
|
| 28412 |
+
"grad_norm": 0.4052026569843292,
|
| 28413 |
+
"learning_rate": 4.1368097797678255e-08,
|
| 28414 |
+
"loss": 0.1083,
|
| 28415 |
+
"step": 4034
|
| 28416 |
+
},
|
| 28417 |
+
{
|
| 28418 |
+
"epoch": 2.972375690607735,
|
| 28419 |
+
"grad_norm": 0.42593371868133545,
|
| 28420 |
+
"learning_rate": 3.916234614346204e-08,
|
| 28421 |
+
"loss": 0.1469,
|
| 28422 |
+
"step": 4035
|
| 28423 |
+
},
|
| 28424 |
+
{
|
| 28425 |
+
"epoch": 2.9731123388581953,
|
| 28426 |
+
"grad_norm": 0.40533506870269775,
|
| 28427 |
+
"learning_rate": 3.701701048105521e-08,
|
| 28428 |
+
"loss": 0.1215,
|
| 28429 |
+
"step": 4036
|
| 28430 |
+
},
|
| 28431 |
+
{
|
| 28432 |
+
"epoch": 2.9738489871086555,
|
| 28433 |
+
"grad_norm": 0.5333660244941711,
|
| 28434 |
+
"learning_rate": 3.493209210708725e-08,
|
| 28435 |
+
"loss": 0.1454,
|
| 28436 |
+
"step": 4037
|
| 28437 |
+
},
|
| 28438 |
+
{
|
| 28439 |
+
"epoch": 2.974585635359116,
|
| 28440 |
+
"grad_norm": 0.4893753230571747,
|
| 28441 |
+
"learning_rate": 3.29075922816946e-08,
|
| 28442 |
+
"loss": 0.1435,
|
| 28443 |
+
"step": 4038
|
| 28444 |
+
},
|
| 28445 |
+
{
|
| 28446 |
+
"epoch": 2.9753222836095765,
|
| 28447 |
+
"grad_norm": 0.39299649000167847,
|
| 28448 |
+
"learning_rate": 3.094351222844294e-08,
|
| 28449 |
+
"loss": 0.1066,
|
| 28450 |
+
"step": 4039
|
| 28451 |
+
},
|
| 28452 |
+
{
|
| 28453 |
+
"epoch": 2.976058931860037,
|
| 28454 |
+
"grad_norm": 0.4104492962360382,
|
| 28455 |
+
"learning_rate": 2.9039853134449348e-08,
|
| 28456 |
+
"loss": 0.0963,
|
| 28457 |
+
"step": 4040
|
| 28458 |
+
},
|
| 28459 |
+
{
|
| 28460 |
+
"epoch": 2.9767955801104975,
|
| 28461 |
+
"grad_norm": 0.4806900918483734,
|
| 28462 |
+
"learning_rate": 2.7196616150271247e-08,
|
| 28463 |
+
"loss": 0.1255,
|
| 28464 |
+
"step": 4041
|
| 28465 |
+
},
|
| 28466 |
+
{
|
| 28467 |
+
"epoch": 2.9775322283609578,
|
| 28468 |
+
"grad_norm": 0.4183269143104553,
|
| 28469 |
+
"learning_rate": 2.5413802389939735e-08,
|
| 28470 |
+
"loss": 0.1456,
|
| 28471 |
+
"step": 4042
|
| 28472 |
+
},
|
| 28473 |
+
{
|
| 28474 |
+
"epoch": 2.978268876611418,
|
| 28475 |
+
"grad_norm": 0.4006012976169586,
|
| 28476 |
+
"learning_rate": 2.3691412930992864e-08,
|
| 28477 |
+
"loss": 0.1109,
|
| 28478 |
+
"step": 4043
|
| 28479 |
+
},
|
| 28480 |
+
{
|
| 28481 |
+
"epoch": 2.9790055248618783,
|
| 28482 |
+
"grad_norm": 0.4611269235610962,
|
| 28483 |
+
"learning_rate": 2.2029448814431253e-08,
|
| 28484 |
+
"loss": 0.1349,
|
| 28485 |
+
"step": 4044
|
| 28486 |
+
},
|
| 28487 |
+
{
|
| 28488 |
+
"epoch": 2.979742173112339,
|
| 28489 |
+
"grad_norm": 0.4572463631629944,
|
| 28490 |
+
"learning_rate": 2.0427911044751392e-08,
|
| 28491 |
+
"loss": 0.1125,
|
| 28492 |
+
"step": 4045
|
| 28493 |
+
},
|
| 28494 |
+
{
|
| 28495 |
+
"epoch": 2.9804788213627993,
|
| 28496 |
+
"grad_norm": 0.3652282953262329,
|
| 28497 |
+
"learning_rate": 1.8886800589912322e-08,
|
| 28498 |
+
"loss": 0.0912,
|
| 28499 |
+
"step": 4046
|
| 28500 |
+
},
|
| 28501 |
+
{
|
| 28502 |
+
"epoch": 2.9812154696132596,
|
| 28503 |
+
"grad_norm": 0.4529966413974762,
|
| 28504 |
+
"learning_rate": 1.7406118381346758e-08,
|
| 28505 |
+
"loss": 0.0887,
|
| 28506 |
+
"step": 4047
|
| 28507 |
+
},
|
| 28508 |
+
{
|
| 28509 |
+
"epoch": 2.9819521178637203,
|
| 28510 |
+
"grad_norm": 0.41920584440231323,
|
| 28511 |
+
"learning_rate": 1.5985865313994374e-08,
|
| 28512 |
+
"loss": 0.1024,
|
| 28513 |
+
"step": 4048
|
| 28514 |
+
},
|
| 28515 |
+
{
|
| 28516 |
+
"epoch": 2.9826887661141805,
|
| 28517 |
+
"grad_norm": 0.38177281618118286,
|
| 28518 |
+
"learning_rate": 1.4626042246224103e-08,
|
| 28519 |
+
"loss": 0.1005,
|
| 28520 |
+
"step": 4049
|
| 28521 |
+
},
|
| 28522 |
+
{
|
| 28523 |
+
"epoch": 2.983425414364641,
|
| 28524 |
+
"grad_norm": 0.46106603741645813,
|
| 28525 |
+
"learning_rate": 1.3326649999934049e-08,
|
| 28526 |
+
"loss": 0.1222,
|
| 28527 |
+
"step": 4050
|
| 28528 |
+
},
|
| 28529 |
+
{
|
| 28530 |
+
"epoch": 2.984162062615101,
|
| 28531 |
+
"grad_norm": 0.44589948654174805,
|
| 28532 |
+
"learning_rate": 1.208768936045157e-08,
|
| 28533 |
+
"loss": 0.1088,
|
| 28534 |
+
"step": 4051
|
| 28535 |
+
},
|
| 28536 |
+
{
|
| 28537 |
+
"epoch": 2.984898710865562,
|
| 28538 |
+
"grad_norm": 0.4326612651348114,
|
| 28539 |
+
"learning_rate": 1.09091610766221e-08,
|
| 28540 |
+
"loss": 0.1544,
|
| 28541 |
+
"step": 4052
|
| 28542 |
+
},
|
| 28543 |
+
{
|
| 28544 |
+
"epoch": 2.985635359116022,
|
| 28545 |
+
"grad_norm": 0.40109142661094666,
|
| 28546 |
+
"learning_rate": 9.791065860720317e-09,
|
| 28547 |
+
"loss": 0.1251,
|
| 28548 |
+
"step": 4053
|
| 28549 |
+
},
|
| 28550 |
+
{
|
| 28551 |
+
"epoch": 2.9863720073664823,
|
| 28552 |
+
"grad_norm": 0.418179452419281,
|
| 28553 |
+
"learning_rate": 8.733404388538979e-09,
|
| 28554 |
+
"loss": 0.091,
|
| 28555 |
+
"step": 4054
|
| 28556 |
+
},
|
| 28557 |
+
{
|
| 28558 |
+
"epoch": 2.987108655616943,
|
| 28559 |
+
"grad_norm": 0.41939765214920044,
|
| 28560 |
+
"learning_rate": 7.736177299311198e-09,
|
| 28561 |
+
"loss": 0.1116,
|
| 28562 |
+
"step": 4055
|
| 28563 |
+
},
|
| 28564 |
+
{
|
| 28565 |
+
"epoch": 2.9878453038674033,
|
| 28566 |
+
"grad_norm": 0.4822397232055664,
|
| 28567 |
+
"learning_rate": 6.79938519576595e-09,
|
| 28568 |
+
"loss": 0.1445,
|
| 28569 |
+
"step": 4056
|
| 28570 |
+
},
|
| 28571 |
+
{
|
| 28572 |
+
"epoch": 2.9885819521178636,
|
| 28573 |
+
"grad_norm": 0.33148708939552307,
|
| 28574 |
+
"learning_rate": 5.9230286440947745e-09,
|
| 28575 |
+
"loss": 0.0995,
|
| 28576 |
+
"step": 4057
|
| 28577 |
+
},
|
| 28578 |
+
{
|
| 28579 |
+
"epoch": 2.989318600368324,
|
| 28580 |
+
"grad_norm": 0.394879549741745,
|
| 28581 |
+
"learning_rate": 5.107108173962871e-09,
|
| 28582 |
+
"loss": 0.0885,
|
| 28583 |
+
"step": 4058
|
| 28584 |
+
},
|
| 28585 |
+
{
|
| 28586 |
+
"epoch": 2.9900552486187846,
|
| 28587 |
+
"grad_norm": 0.535071849822998,
|
| 28588 |
+
"learning_rate": 4.351624278509103e-09,
|
| 28589 |
+
"loss": 0.1855,
|
| 28590 |
+
"step": 4059
|
| 28591 |
+
},
|
| 28592 |
+
{
|
| 28593 |
+
"epoch": 2.990791896869245,
|
| 28594 |
+
"grad_norm": 0.39724433422088623,
|
| 28595 |
+
"learning_rate": 3.6565774143459962e-09,
|
| 28596 |
+
"loss": 0.1311,
|
| 28597 |
+
"step": 4060
|
| 28598 |
+
},
|
| 28599 |
+
{
|
| 28600 |
+
"epoch": 2.9915285451197056,
|
| 28601 |
+
"grad_norm": 0.4425889551639557,
|
| 28602 |
+
"learning_rate": 3.0219680015708406e-09,
|
| 28603 |
+
"loss": 0.1081,
|
| 28604 |
+
"step": 4061
|
| 28605 |
+
},
|
| 28606 |
+
{
|
| 28607 |
+
"epoch": 2.992265193370166,
|
| 28608 |
+
"grad_norm": 0.35188987851142883,
|
| 28609 |
+
"learning_rate": 2.4477964237212824e-09,
|
| 28610 |
+
"loss": 0.1005,
|
| 28611 |
+
"step": 4062
|
| 28612 |
+
},
|
| 28613 |
+
{
|
| 28614 |
+
"epoch": 2.993001841620626,
|
| 28615 |
+
"grad_norm": 0.42041268944740295,
|
| 28616 |
+
"learning_rate": 1.9340630278308345e-09,
|
| 28617 |
+
"loss": 0.1107,
|
| 28618 |
+
"step": 4063
|
| 28619 |
+
},
|
| 28620 |
+
{
|
| 28621 |
+
"epoch": 2.9937384898710864,
|
| 28622 |
+
"grad_norm": 0.43599700927734375,
|
| 28623 |
+
"learning_rate": 1.4807681243955707e-09,
|
| 28624 |
+
"loss": 0.102,
|
| 28625 |
+
"step": 4064
|
| 28626 |
+
},
|
| 28627 |
+
{
|
| 28628 |
+
"epoch": 2.994475138121547,
|
| 28629 |
+
"grad_norm": 0.4020419716835022,
|
| 28630 |
+
"learning_rate": 1.0879119873852262e-09,
|
| 28631 |
+
"loss": 0.1328,
|
| 28632 |
+
"step": 4065
|
| 28633 |
+
},
|
| 28634 |
+
{
|
| 28635 |
+
"epoch": 2.9952117863720074,
|
| 28636 |
+
"grad_norm": 0.5001260042190552,
|
| 28637 |
+
"learning_rate": 7.554948542543017e-10,
|
| 28638 |
+
"loss": 0.1634,
|
| 28639 |
+
"step": 4066
|
| 28640 |
+
},
|
| 28641 |
+
{
|
| 28642 |
+
"epoch": 2.9959484346224676,
|
| 28643 |
+
"grad_norm": 0.48046594858169556,
|
| 28644 |
+
"learning_rate": 4.835169258976536e-10,
|
| 28645 |
+
"loss": 0.1276,
|
| 28646 |
+
"step": 4067
|
| 28647 |
+
},
|
| 28648 |
+
{
|
| 28649 |
+
"epoch": 2.9966850828729283,
|
| 28650 |
+
"grad_norm": 0.3800838589668274,
|
| 28651 |
+
"learning_rate": 2.71978366706005e-10,
|
| 28652 |
+
"loss": 0.0995,
|
| 28653 |
+
"step": 4068
|
| 28654 |
+
},
|
| 28655 |
+
{
|
| 28656 |
+
"epoch": 2.9974217311233886,
|
| 28657 |
+
"grad_norm": 0.38035425543785095,
|
| 28658 |
+
"learning_rate": 1.2087930453263952e-10,
|
| 28659 |
+
"loss": 0.1164,
|
| 28660 |
+
"step": 4069
|
| 28661 |
+
},
|
| 28662 |
+
{
|
| 28663 |
+
"epoch": 2.998158379373849,
|
| 28664 |
+
"grad_norm": 0.48082515597343445,
|
| 28665 |
+
"learning_rate": 3.021983070450318e-11,
|
| 28666 |
+
"loss": 0.1213,
|
| 28667 |
+
"step": 4070
|
| 28668 |
+
},
|
| 28669 |
+
{
|
| 28670 |
+
"epoch": 2.998895027624309,
|
| 28671 |
+
"grad_norm": 0.3878474831581116,
|
| 28672 |
+
"learning_rate": 0.0,
|
| 28673 |
+
"loss": 0.103,
|
| 28674 |
+
"step": 4071
|
| 28675 |
}
|
| 28676 |
],
|
| 28677 |
"logging_steps": 1,
|
|
|
|
| 28695 |
"should_evaluate": false,
|
| 28696 |
"should_log": false,
|
| 28697 |
"should_save": true,
|
| 28698 |
+
"should_training_stop": true
|
| 28699 |
},
|
| 28700 |
"attributes": {}
|
| 28701 |
}
|
| 28702 |
},
|
| 28703 |
+
"total_flos": 4.089665390861353e+17,
|
| 28704 |
"train_batch_size": 16,
|
| 28705 |
"trial_name": null,
|
| 28706 |
"trial_params": null
|