Training in progress, step 56000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +353 -3
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 304481530
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5fe736c4aa2a01e7bdd450b3f5ad17d22bd6d998c21f3be88229c094c87c7e31
|
| 3 |
size 304481530
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 402029570
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f13c5595cffc9acc4fba913e67571bbfa169120e968c56adede64d35dc4a9983
|
| 3 |
size 402029570
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eefd6eabe10776e158c26b037c833f0a538e87ecc5b41f3ec5b83db2ee085222
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61b622224d0429fa788320c8e5bf7b4fa226b91d5779b03ff807c7a77c5801ff
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:13eaed908712a1c285ee1e1812b438bbabf64c8443377b65c97ba88f1f1659c5
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:511643e6249f4ea9212a1dfdf8dd72a9148b63815d4ef9de03948ab4598161f3
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0989f631c4201212ca348622ae2d095f9b6b69c39f42732c5c97cef21592c5a6
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -19258,6 +19258,356 @@
|
|
| 19258 |
"learning_rate": 0.0004865416490511318,
|
| 19259 |
"loss": 17.1327,
|
| 19260 |
"step": 55000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19261 |
}
|
| 19262 |
],
|
| 19263 |
"logging_steps": 20,
|
|
@@ -19277,7 +19627,7 @@
|
|
| 19277 |
"attributes": {}
|
| 19278 |
}
|
| 19279 |
},
|
| 19280 |
-
"total_flos": 4.
|
| 19281 |
"train_batch_size": 48,
|
| 19282 |
"trial_name": null,
|
| 19283 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.0829536229994845,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 56000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 19258 |
"learning_rate": 0.0004865416490511318,
|
| 19259 |
"loss": 17.1327,
|
| 19260 |
"step": 55000
|
| 19261 |
+
},
|
| 19262 |
+
{
|
| 19263 |
+
"epoch": 0.08150193459699352,
|
| 19264 |
+
"grad_norm": 6.9375,
|
| 19265 |
+
"learning_rate": 0.00048653671011612425,
|
| 19266 |
+
"loss": 17.1478,
|
| 19267 |
+
"step": 55020
|
| 19268 |
+
},
|
| 19269 |
+
{
|
| 19270 |
+
"epoch": 0.08153156089092191,
|
| 19271 |
+
"grad_norm": 7.28125,
|
| 19272 |
+
"learning_rate": 0.0004865317711811167,
|
| 19273 |
+
"loss": 17.1158,
|
| 19274 |
+
"step": 55040
|
| 19275 |
+
},
|
| 19276 |
+
{
|
| 19277 |
+
"epoch": 0.0815611871848503,
|
| 19278 |
+
"grad_norm": 6.96875,
|
| 19279 |
+
"learning_rate": 0.0004865268322461091,
|
| 19280 |
+
"loss": 17.1082,
|
| 19281 |
+
"step": 55060
|
| 19282 |
+
},
|
| 19283 |
+
{
|
| 19284 |
+
"epoch": 0.08159081347877868,
|
| 19285 |
+
"grad_norm": 7.34375,
|
| 19286 |
+
"learning_rate": 0.00048652189331110154,
|
| 19287 |
+
"loss": 17.1178,
|
| 19288 |
+
"step": 55080
|
| 19289 |
+
},
|
| 19290 |
+
{
|
| 19291 |
+
"epoch": 0.08162043977270707,
|
| 19292 |
+
"grad_norm": 7.125,
|
| 19293 |
+
"learning_rate": 0.000486516954376094,
|
| 19294 |
+
"loss": 17.0626,
|
| 19295 |
+
"step": 55100
|
| 19296 |
+
},
|
| 19297 |
+
{
|
| 19298 |
+
"epoch": 0.08165006606663545,
|
| 19299 |
+
"grad_norm": 7.40625,
|
| 19300 |
+
"learning_rate": 0.00048651201544108643,
|
| 19301 |
+
"loss": 17.1654,
|
| 19302 |
+
"step": 55120
|
| 19303 |
+
},
|
| 19304 |
+
{
|
| 19305 |
+
"epoch": 0.08167969236056384,
|
| 19306 |
+
"grad_norm": 6.375,
|
| 19307 |
+
"learning_rate": 0.0004865070765060788,
|
| 19308 |
+
"loss": 17.1095,
|
| 19309 |
+
"step": 55140
|
| 19310 |
+
},
|
| 19311 |
+
{
|
| 19312 |
+
"epoch": 0.08170931865449224,
|
| 19313 |
+
"grad_norm": 7.21875,
|
| 19314 |
+
"learning_rate": 0.0004865021375710713,
|
| 19315 |
+
"loss": 17.1073,
|
| 19316 |
+
"step": 55160
|
| 19317 |
+
},
|
| 19318 |
+
{
|
| 19319 |
+
"epoch": 0.08173894494842063,
|
| 19320 |
+
"grad_norm": 7.15625,
|
| 19321 |
+
"learning_rate": 0.0004864971986360637,
|
| 19322 |
+
"loss": 17.1573,
|
| 19323 |
+
"step": 55180
|
| 19324 |
+
},
|
| 19325 |
+
{
|
| 19326 |
+
"epoch": 0.08176857124234901,
|
| 19327 |
+
"grad_norm": 7.15625,
|
| 19328 |
+
"learning_rate": 0.00048649225970105617,
|
| 19329 |
+
"loss": 17.1434,
|
| 19330 |
+
"step": 55200
|
| 19331 |
+
},
|
| 19332 |
+
{
|
| 19333 |
+
"epoch": 0.0817981975362774,
|
| 19334 |
+
"grad_norm": 6.59375,
|
| 19335 |
+
"learning_rate": 0.00048648732076604856,
|
| 19336 |
+
"loss": 17.0566,
|
| 19337 |
+
"step": 55220
|
| 19338 |
+
},
|
| 19339 |
+
{
|
| 19340 |
+
"epoch": 0.08182782383020579,
|
| 19341 |
+
"grad_norm": 7.71875,
|
| 19342 |
+
"learning_rate": 0.00048648238183104106,
|
| 19343 |
+
"loss": 17.1302,
|
| 19344 |
+
"step": 55240
|
| 19345 |
+
},
|
| 19346 |
+
{
|
| 19347 |
+
"epoch": 0.08185745012413417,
|
| 19348 |
+
"grad_norm": 6.9375,
|
| 19349 |
+
"learning_rate": 0.00048647744289603346,
|
| 19350 |
+
"loss": 17.1324,
|
| 19351 |
+
"step": 55260
|
| 19352 |
+
},
|
| 19353 |
+
{
|
| 19354 |
+
"epoch": 0.08188707641806256,
|
| 19355 |
+
"grad_norm": 12.125,
|
| 19356 |
+
"learning_rate": 0.0004864725039610259,
|
| 19357 |
+
"loss": 17.0792,
|
| 19358 |
+
"step": 55280
|
| 19359 |
+
},
|
| 19360 |
+
{
|
| 19361 |
+
"epoch": 0.08191670271199095,
|
| 19362 |
+
"grad_norm": 7.90625,
|
| 19363 |
+
"learning_rate": 0.0004864675650260183,
|
| 19364 |
+
"loss": 17.1382,
|
| 19365 |
+
"step": 55300
|
| 19366 |
+
},
|
| 19367 |
+
{
|
| 19368 |
+
"epoch": 0.08194632900591933,
|
| 19369 |
+
"grad_norm": 6.78125,
|
| 19370 |
+
"learning_rate": 0.00048646262609101075,
|
| 19371 |
+
"loss": 17.1347,
|
| 19372 |
+
"step": 55320
|
| 19373 |
+
},
|
| 19374 |
+
{
|
| 19375 |
+
"epoch": 0.08197595529984772,
|
| 19376 |
+
"grad_norm": 7.25,
|
| 19377 |
+
"learning_rate": 0.0004864576871560032,
|
| 19378 |
+
"loss": 17.0667,
|
| 19379 |
+
"step": 55340
|
| 19380 |
+
},
|
| 19381 |
+
{
|
| 19382 |
+
"epoch": 0.0820055815937761,
|
| 19383 |
+
"grad_norm": 7.0,
|
| 19384 |
+
"learning_rate": 0.0004864527482209956,
|
| 19385 |
+
"loss": 17.0842,
|
| 19386 |
+
"step": 55360
|
| 19387 |
+
},
|
| 19388 |
+
{
|
| 19389 |
+
"epoch": 0.08203520788770449,
|
| 19390 |
+
"grad_norm": 7.71875,
|
| 19391 |
+
"learning_rate": 0.00048644780928598804,
|
| 19392 |
+
"loss": 17.1114,
|
| 19393 |
+
"step": 55380
|
| 19394 |
+
},
|
| 19395 |
+
{
|
| 19396 |
+
"epoch": 0.08206483418163288,
|
| 19397 |
+
"grad_norm": 7.34375,
|
| 19398 |
+
"learning_rate": 0.0004864428703509805,
|
| 19399 |
+
"loss": 17.089,
|
| 19400 |
+
"step": 55400
|
| 19401 |
+
},
|
| 19402 |
+
{
|
| 19403 |
+
"epoch": 0.08209446047556127,
|
| 19404 |
+
"grad_norm": 7.875,
|
| 19405 |
+
"learning_rate": 0.00048643793141597293,
|
| 19406 |
+
"loss": 17.1292,
|
| 19407 |
+
"step": 55420
|
| 19408 |
+
},
|
| 19409 |
+
{
|
| 19410 |
+
"epoch": 0.08212408676948965,
|
| 19411 |
+
"grad_norm": 6.96875,
|
| 19412 |
+
"learning_rate": 0.0004864329924809653,
|
| 19413 |
+
"loss": 17.1258,
|
| 19414 |
+
"step": 55440
|
| 19415 |
+
},
|
| 19416 |
+
{
|
| 19417 |
+
"epoch": 0.08215371306341804,
|
| 19418 |
+
"grad_norm": 6.28125,
|
| 19419 |
+
"learning_rate": 0.0004864280535459578,
|
| 19420 |
+
"loss": 17.1261,
|
| 19421 |
+
"step": 55460
|
| 19422 |
+
},
|
| 19423 |
+
{
|
| 19424 |
+
"epoch": 0.08218333935734644,
|
| 19425 |
+
"grad_norm": 6.6875,
|
| 19426 |
+
"learning_rate": 0.0004864231146109502,
|
| 19427 |
+
"loss": 17.0684,
|
| 19428 |
+
"step": 55480
|
| 19429 |
+
},
|
| 19430 |
+
{
|
| 19431 |
+
"epoch": 0.08221296565127482,
|
| 19432 |
+
"grad_norm": 7.625,
|
| 19433 |
+
"learning_rate": 0.00048641817567594267,
|
| 19434 |
+
"loss": 17.0862,
|
| 19435 |
+
"step": 55500
|
| 19436 |
+
},
|
| 19437 |
+
{
|
| 19438 |
+
"epoch": 0.08224259194520321,
|
| 19439 |
+
"grad_norm": 6.53125,
|
| 19440 |
+
"learning_rate": 0.00048641323674093506,
|
| 19441 |
+
"loss": 17.1307,
|
| 19442 |
+
"step": 55520
|
| 19443 |
+
},
|
| 19444 |
+
{
|
| 19445 |
+
"epoch": 0.0822722182391316,
|
| 19446 |
+
"grad_norm": 6.46875,
|
| 19447 |
+
"learning_rate": 0.00048640829780592756,
|
| 19448 |
+
"loss": 17.0831,
|
| 19449 |
+
"step": 55540
|
| 19450 |
+
},
|
| 19451 |
+
{
|
| 19452 |
+
"epoch": 0.08230184453305998,
|
| 19453 |
+
"grad_norm": 7.0,
|
| 19454 |
+
"learning_rate": 0.00048640335887091996,
|
| 19455 |
+
"loss": 17.0549,
|
| 19456 |
+
"step": 55560
|
| 19457 |
+
},
|
| 19458 |
+
{
|
| 19459 |
+
"epoch": 0.08233147082698837,
|
| 19460 |
+
"grad_norm": 6.625,
|
| 19461 |
+
"learning_rate": 0.0004863984199359124,
|
| 19462 |
+
"loss": 17.1213,
|
| 19463 |
+
"step": 55580
|
| 19464 |
+
},
|
| 19465 |
+
{
|
| 19466 |
+
"epoch": 0.08236109712091676,
|
| 19467 |
+
"grad_norm": 7.09375,
|
| 19468 |
+
"learning_rate": 0.0004863934810009048,
|
| 19469 |
+
"loss": 17.1498,
|
| 19470 |
+
"step": 55600
|
| 19471 |
+
},
|
| 19472 |
+
{
|
| 19473 |
+
"epoch": 0.08239072341484514,
|
| 19474 |
+
"grad_norm": 6.25,
|
| 19475 |
+
"learning_rate": 0.0004863885420658973,
|
| 19476 |
+
"loss": 17.0782,
|
| 19477 |
+
"step": 55620
|
| 19478 |
+
},
|
| 19479 |
+
{
|
| 19480 |
+
"epoch": 0.08242034970877353,
|
| 19481 |
+
"grad_norm": 6.4375,
|
| 19482 |
+
"learning_rate": 0.0004863836031308897,
|
| 19483 |
+
"loss": 17.1037,
|
| 19484 |
+
"step": 55640
|
| 19485 |
+
},
|
| 19486 |
+
{
|
| 19487 |
+
"epoch": 0.08244997600270192,
|
| 19488 |
+
"grad_norm": 7.15625,
|
| 19489 |
+
"learning_rate": 0.0004863786641958821,
|
| 19490 |
+
"loss": 17.1475,
|
| 19491 |
+
"step": 55660
|
| 19492 |
+
},
|
| 19493 |
+
{
|
| 19494 |
+
"epoch": 0.0824796022966303,
|
| 19495 |
+
"grad_norm": 6.90625,
|
| 19496 |
+
"learning_rate": 0.00048637372526087454,
|
| 19497 |
+
"loss": 17.1295,
|
| 19498 |
+
"step": 55680
|
| 19499 |
+
},
|
| 19500 |
+
{
|
| 19501 |
+
"epoch": 0.08250922859055869,
|
| 19502 |
+
"grad_norm": 6.21875,
|
| 19503 |
+
"learning_rate": 0.000486368786325867,
|
| 19504 |
+
"loss": 17.0528,
|
| 19505 |
+
"step": 55700
|
| 19506 |
+
},
|
| 19507 |
+
{
|
| 19508 |
+
"epoch": 0.08253885488448708,
|
| 19509 |
+
"grad_norm": 6.21875,
|
| 19510 |
+
"learning_rate": 0.00048636384739085943,
|
| 19511 |
+
"loss": 17.0928,
|
| 19512 |
+
"step": 55720
|
| 19513 |
+
},
|
| 19514 |
+
{
|
| 19515 |
+
"epoch": 0.08256848117841546,
|
| 19516 |
+
"grad_norm": 7.09375,
|
| 19517 |
+
"learning_rate": 0.0004863589084558518,
|
| 19518 |
+
"loss": 17.0391,
|
| 19519 |
+
"step": 55740
|
| 19520 |
+
},
|
| 19521 |
+
{
|
| 19522 |
+
"epoch": 0.08259810747234385,
|
| 19523 |
+
"grad_norm": 7.21875,
|
| 19524 |
+
"learning_rate": 0.0004863539695208443,
|
| 19525 |
+
"loss": 17.1124,
|
| 19526 |
+
"step": 55760
|
| 19527 |
+
},
|
| 19528 |
+
{
|
| 19529 |
+
"epoch": 0.08262773376627225,
|
| 19530 |
+
"grad_norm": 6.65625,
|
| 19531 |
+
"learning_rate": 0.0004863490305858367,
|
| 19532 |
+
"loss": 17.1316,
|
| 19533 |
+
"step": 55780
|
| 19534 |
+
},
|
| 19535 |
+
{
|
| 19536 |
+
"epoch": 0.08265736006020064,
|
| 19537 |
+
"grad_norm": 6.8125,
|
| 19538 |
+
"learning_rate": 0.00048634409165082917,
|
| 19539 |
+
"loss": 17.081,
|
| 19540 |
+
"step": 55800
|
| 19541 |
+
},
|
| 19542 |
+
{
|
| 19543 |
+
"epoch": 0.08268698635412902,
|
| 19544 |
+
"grad_norm": 7.625,
|
| 19545 |
+
"learning_rate": 0.00048633915271582156,
|
| 19546 |
+
"loss": 17.064,
|
| 19547 |
+
"step": 55820
|
| 19548 |
+
},
|
| 19549 |
+
{
|
| 19550 |
+
"epoch": 0.08271661264805741,
|
| 19551 |
+
"grad_norm": 5.96875,
|
| 19552 |
+
"learning_rate": 0.00048633421378081406,
|
| 19553 |
+
"loss": 17.0937,
|
| 19554 |
+
"step": 55840
|
| 19555 |
+
},
|
| 19556 |
+
{
|
| 19557 |
+
"epoch": 0.0827462389419858,
|
| 19558 |
+
"grad_norm": 7.75,
|
| 19559 |
+
"learning_rate": 0.00048632927484580646,
|
| 19560 |
+
"loss": 17.0401,
|
| 19561 |
+
"step": 55860
|
| 19562 |
+
},
|
| 19563 |
+
{
|
| 19564 |
+
"epoch": 0.08277586523591418,
|
| 19565 |
+
"grad_norm": 6.84375,
|
| 19566 |
+
"learning_rate": 0.0004863243359107989,
|
| 19567 |
+
"loss": 17.0646,
|
| 19568 |
+
"step": 55880
|
| 19569 |
+
},
|
| 19570 |
+
{
|
| 19571 |
+
"epoch": 0.08280549152984257,
|
| 19572 |
+
"grad_norm": 6.3125,
|
| 19573 |
+
"learning_rate": 0.0004863193969757913,
|
| 19574 |
+
"loss": 17.0864,
|
| 19575 |
+
"step": 55900
|
| 19576 |
+
},
|
| 19577 |
+
{
|
| 19578 |
+
"epoch": 0.08283511782377095,
|
| 19579 |
+
"grad_norm": 7.90625,
|
| 19580 |
+
"learning_rate": 0.0004863144580407838,
|
| 19581 |
+
"loss": 17.0813,
|
| 19582 |
+
"step": 55920
|
| 19583 |
+
},
|
| 19584 |
+
{
|
| 19585 |
+
"epoch": 0.08286474411769934,
|
| 19586 |
+
"grad_norm": 8.0625,
|
| 19587 |
+
"learning_rate": 0.0004863095191057762,
|
| 19588 |
+
"loss": 17.0242,
|
| 19589 |
+
"step": 55940
|
| 19590 |
+
},
|
| 19591 |
+
{
|
| 19592 |
+
"epoch": 0.08289437041162773,
|
| 19593 |
+
"grad_norm": 6.78125,
|
| 19594 |
+
"learning_rate": 0.00048630458017076864,
|
| 19595 |
+
"loss": 17.1233,
|
| 19596 |
+
"step": 55960
|
| 19597 |
+
},
|
| 19598 |
+
{
|
| 19599 |
+
"epoch": 0.08292399670555611,
|
| 19600 |
+
"grad_norm": 6.3125,
|
| 19601 |
+
"learning_rate": 0.00048629964123576104,
|
| 19602 |
+
"loss": 17.0682,
|
| 19603 |
+
"step": 55980
|
| 19604 |
+
},
|
| 19605 |
+
{
|
| 19606 |
+
"epoch": 0.0829536229994845,
|
| 19607 |
+
"grad_norm": 7.375,
|
| 19608 |
+
"learning_rate": 0.0004862947023007535,
|
| 19609 |
+
"loss": 17.0628,
|
| 19610 |
+
"step": 56000
|
| 19611 |
}
|
| 19612 |
],
|
| 19613 |
"logging_steps": 20,
|
|
|
|
| 19627 |
"attributes": {}
|
| 19628 |
}
|
| 19629 |
},
|
| 19630 |
+
"total_flos": 4.117128426295394e+19,
|
| 19631 |
"train_batch_size": 48,
|
| 19632 |
"trial_name": null,
|
| 19633 |
"trial_params": null
|