Training in progress, step 2400, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2066752
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e24a0310c4bf5f98acaaef3be18a2ca4d2a87e738c09732e426a40950ed1a048
|
| 3 |
size 2066752
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4121235
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd1af5c861590ed1aa8a2c671dc6f425bb6ed5438470f7ad02d8e2b79717ef16
|
| 3 |
size 4121235
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14391
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:945f86d8abac1aa8354820af0171c56d0716798844a742f05ee2e852dae77534
|
| 3 |
size 14391
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d80fe92cd59dacf4b7d6a34254c209dfa9333b3830ad0abdd0afa76d827d8203
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 100,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -16292,6 +16292,714 @@
|
|
| 16292 |
"eval_samples_per_second": 1.698,
|
| 16293 |
"eval_steps_per_second": 0.212,
|
| 16294 |
"step": 2300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16295 |
}
|
| 16296 |
],
|
| 16297 |
"logging_steps": 1,
|
|
@@ -16311,7 +17019,7 @@
|
|
| 16311 |
"attributes": {}
|
| 16312 |
}
|
| 16313 |
},
|
| 16314 |
-
"total_flos":
|
| 16315 |
"train_batch_size": 1,
|
| 16316 |
"trial_name": null,
|
| 16317 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.10365379632029023,
|
| 6 |
"eval_steps": 100,
|
| 7 |
+
"global_step": 2400,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 16292 |
"eval_samples_per_second": 1.698,
|
| 16293 |
"eval_steps_per_second": 0.212,
|
| 16294 |
"step": 2300
|
| 16295 |
+
},
|
| 16296 |
+
{
|
| 16297 |
+
"epoch": 0.09937807722207825,
|
| 16298 |
+
"grad_norm": 0.9453125,
|
| 16299 |
+
"learning_rate": 0.0009933637689106113,
|
| 16300 |
+
"loss": 8.1346,
|
| 16301 |
+
"step": 2301
|
| 16302 |
+
},
|
| 16303 |
+
{
|
| 16304 |
+
"epoch": 0.09942126630387838,
|
| 16305 |
+
"grad_norm": 0.515625,
|
| 16306 |
+
"learning_rate": 0.0009933521675350812,
|
| 16307 |
+
"loss": 8.3205,
|
| 16308 |
+
"step": 2302
|
| 16309 |
+
},
|
| 16310 |
+
{
|
| 16311 |
+
"epoch": 0.0994644553856785,
|
| 16312 |
+
"grad_norm": 0.59765625,
|
| 16313 |
+
"learning_rate": 0.0009933405560955803,
|
| 16314 |
+
"loss": 8.3031,
|
| 16315 |
+
"step": 2303
|
| 16316 |
+
},
|
| 16317 |
+
{
|
| 16318 |
+
"epoch": 0.09950764446747862,
|
| 16319 |
+
"grad_norm": 0.447265625,
|
| 16320 |
+
"learning_rate": 0.0009933289345923457,
|
| 16321 |
+
"loss": 8.3805,
|
| 16322 |
+
"step": 2304
|
| 16323 |
+
},
|
| 16324 |
+
{
|
| 16325 |
+
"epoch": 0.09955083354927874,
|
| 16326 |
+
"grad_norm": 0.57421875,
|
| 16327 |
+
"learning_rate": 0.0009933173030256142,
|
| 16328 |
+
"loss": 8.191,
|
| 16329 |
+
"step": 2305
|
| 16330 |
+
},
|
| 16331 |
+
{
|
| 16332 |
+
"epoch": 0.09959402263107886,
|
| 16333 |
+
"grad_norm": 0.50390625,
|
| 16334 |
+
"learning_rate": 0.0009933056613956233,
|
| 16335 |
+
"loss": 8.427,
|
| 16336 |
+
"step": 2306
|
| 16337 |
+
},
|
| 16338 |
+
{
|
| 16339 |
+
"epoch": 0.09963721171287898,
|
| 16340 |
+
"grad_norm": 0.515625,
|
| 16341 |
+
"learning_rate": 0.0009932940097026105,
|
| 16342 |
+
"loss": 8.2676,
|
| 16343 |
+
"step": 2307
|
| 16344 |
+
},
|
| 16345 |
+
{
|
| 16346 |
+
"epoch": 0.0996804007946791,
|
| 16347 |
+
"grad_norm": 0.671875,
|
| 16348 |
+
"learning_rate": 0.0009932823479468131,
|
| 16349 |
+
"loss": 8.4411,
|
| 16350 |
+
"step": 2308
|
| 16351 |
+
},
|
| 16352 |
+
{
|
| 16353 |
+
"epoch": 0.09972358987647922,
|
| 16354 |
+
"grad_norm": 0.5234375,
|
| 16355 |
+
"learning_rate": 0.0009932706761284695,
|
| 16356 |
+
"loss": 8.1711,
|
| 16357 |
+
"step": 2309
|
| 16358 |
+
},
|
| 16359 |
+
{
|
| 16360 |
+
"epoch": 0.09976677895827935,
|
| 16361 |
+
"grad_norm": 0.51953125,
|
| 16362 |
+
"learning_rate": 0.0009932589942478174,
|
| 16363 |
+
"loss": 8.4505,
|
| 16364 |
+
"step": 2310
|
| 16365 |
+
},
|
| 16366 |
+
{
|
| 16367 |
+
"epoch": 0.09980996804007947,
|
| 16368 |
+
"grad_norm": 0.59765625,
|
| 16369 |
+
"learning_rate": 0.0009932473023050955,
|
| 16370 |
+
"loss": 8.1172,
|
| 16371 |
+
"step": 2311
|
| 16372 |
+
},
|
| 16373 |
+
{
|
| 16374 |
+
"epoch": 0.09985315712187959,
|
| 16375 |
+
"grad_norm": 0.75390625,
|
| 16376 |
+
"learning_rate": 0.0009932356003005418,
|
| 16377 |
+
"loss": 8.5325,
|
| 16378 |
+
"step": 2312
|
| 16379 |
+
},
|
| 16380 |
+
{
|
| 16381 |
+
"epoch": 0.09989634620367971,
|
| 16382 |
+
"grad_norm": 0.462890625,
|
| 16383 |
+
"learning_rate": 0.0009932238882343956,
|
| 16384 |
+
"loss": 8.4303,
|
| 16385 |
+
"step": 2313
|
| 16386 |
+
},
|
| 16387 |
+
{
|
| 16388 |
+
"epoch": 0.09993953528547983,
|
| 16389 |
+
"grad_norm": 0.50390625,
|
| 16390 |
+
"learning_rate": 0.0009932121661068952,
|
| 16391 |
+
"loss": 8.434,
|
| 16392 |
+
"step": 2314
|
| 16393 |
+
},
|
| 16394 |
+
{
|
| 16395 |
+
"epoch": 0.09998272436727995,
|
| 16396 |
+
"grad_norm": 0.64453125,
|
| 16397 |
+
"learning_rate": 0.0009932004339182803,
|
| 16398 |
+
"loss": 8.3212,
|
| 16399 |
+
"step": 2315
|
| 16400 |
+
},
|
| 16401 |
+
{
|
| 16402 |
+
"epoch": 0.10002591344908007,
|
| 16403 |
+
"grad_norm": 0.55859375,
|
| 16404 |
+
"learning_rate": 0.0009931886916687896,
|
| 16405 |
+
"loss": 8.3366,
|
| 16406 |
+
"step": 2316
|
| 16407 |
+
},
|
| 16408 |
+
{
|
| 16409 |
+
"epoch": 0.1000691025308802,
|
| 16410 |
+
"grad_norm": 0.4921875,
|
| 16411 |
+
"learning_rate": 0.0009931769393586632,
|
| 16412 |
+
"loss": 8.3998,
|
| 16413 |
+
"step": 2317
|
| 16414 |
+
},
|
| 16415 |
+
{
|
| 16416 |
+
"epoch": 0.10011229161268032,
|
| 16417 |
+
"grad_norm": 0.46484375,
|
| 16418 |
+
"learning_rate": 0.0009931651769881408,
|
| 16419 |
+
"loss": 8.4232,
|
| 16420 |
+
"step": 2318
|
| 16421 |
+
},
|
| 16422 |
+
{
|
| 16423 |
+
"epoch": 0.10015548069448044,
|
| 16424 |
+
"grad_norm": 1.0703125,
|
| 16425 |
+
"learning_rate": 0.0009931534045574616,
|
| 16426 |
+
"loss": 8.5407,
|
| 16427 |
+
"step": 2319
|
| 16428 |
+
},
|
| 16429 |
+
{
|
| 16430 |
+
"epoch": 0.10019866977628056,
|
| 16431 |
+
"grad_norm": 0.80859375,
|
| 16432 |
+
"learning_rate": 0.0009931416220668669,
|
| 16433 |
+
"loss": 8.2877,
|
| 16434 |
+
"step": 2320
|
| 16435 |
+
},
|
| 16436 |
+
{
|
| 16437 |
+
"epoch": 0.10024185885808068,
|
| 16438 |
+
"grad_norm": 0.4921875,
|
| 16439 |
+
"learning_rate": 0.0009931298295165962,
|
| 16440 |
+
"loss": 8.4644,
|
| 16441 |
+
"step": 2321
|
| 16442 |
+
},
|
| 16443 |
+
{
|
| 16444 |
+
"epoch": 0.1002850479398808,
|
| 16445 |
+
"grad_norm": 0.578125,
|
| 16446 |
+
"learning_rate": 0.0009931180269068904,
|
| 16447 |
+
"loss": 8.3041,
|
| 16448 |
+
"step": 2322
|
| 16449 |
+
},
|
| 16450 |
+
{
|
| 16451 |
+
"epoch": 0.10032823702168092,
|
| 16452 |
+
"grad_norm": 0.6640625,
|
| 16453 |
+
"learning_rate": 0.00099310621423799,
|
| 16454 |
+
"loss": 7.9393,
|
| 16455 |
+
"step": 2323
|
| 16456 |
+
},
|
| 16457 |
+
{
|
| 16458 |
+
"epoch": 0.10037142610348104,
|
| 16459 |
+
"grad_norm": 0.5078125,
|
| 16460 |
+
"learning_rate": 0.0009930943915101363,
|
| 16461 |
+
"loss": 8.2464,
|
| 16462 |
+
"step": 2324
|
| 16463 |
+
},
|
| 16464 |
+
{
|
| 16465 |
+
"epoch": 0.10041461518528116,
|
| 16466 |
+
"grad_norm": 0.70703125,
|
| 16467 |
+
"learning_rate": 0.0009930825587235704,
|
| 16468 |
+
"loss": 8.3575,
|
| 16469 |
+
"step": 2325
|
| 16470 |
+
},
|
| 16471 |
+
{
|
| 16472 |
+
"epoch": 0.10045780426708129,
|
| 16473 |
+
"grad_norm": 0.71875,
|
| 16474 |
+
"learning_rate": 0.0009930707158785335,
|
| 16475 |
+
"loss": 8.5688,
|
| 16476 |
+
"step": 2326
|
| 16477 |
+
},
|
| 16478 |
+
{
|
| 16479 |
+
"epoch": 0.1005009933488814,
|
| 16480 |
+
"grad_norm": 0.443359375,
|
| 16481 |
+
"learning_rate": 0.0009930588629752672,
|
| 16482 |
+
"loss": 8.2146,
|
| 16483 |
+
"step": 2327
|
| 16484 |
+
},
|
| 16485 |
+
{
|
| 16486 |
+
"epoch": 0.10054418243068153,
|
| 16487 |
+
"grad_norm": 0.58203125,
|
| 16488 |
+
"learning_rate": 0.0009930470000140135,
|
| 16489 |
+
"loss": 8.5153,
|
| 16490 |
+
"step": 2328
|
| 16491 |
+
},
|
| 16492 |
+
{
|
| 16493 |
+
"epoch": 0.10058737151248165,
|
| 16494 |
+
"grad_norm": 0.462890625,
|
| 16495 |
+
"learning_rate": 0.0009930351269950143,
|
| 16496 |
+
"loss": 8.2966,
|
| 16497 |
+
"step": 2329
|
| 16498 |
+
},
|
| 16499 |
+
{
|
| 16500 |
+
"epoch": 0.10063056059428177,
|
| 16501 |
+
"grad_norm": 0.54296875,
|
| 16502 |
+
"learning_rate": 0.0009930232439185117,
|
| 16503 |
+
"loss": 8.4384,
|
| 16504 |
+
"step": 2330
|
| 16505 |
+
},
|
| 16506 |
+
{
|
| 16507 |
+
"epoch": 0.10067374967608189,
|
| 16508 |
+
"grad_norm": 0.4765625,
|
| 16509 |
+
"learning_rate": 0.0009930113507847483,
|
| 16510 |
+
"loss": 8.4281,
|
| 16511 |
+
"step": 2331
|
| 16512 |
+
},
|
| 16513 |
+
{
|
| 16514 |
+
"epoch": 0.10071693875788201,
|
| 16515 |
+
"grad_norm": 0.6640625,
|
| 16516 |
+
"learning_rate": 0.0009929994475939665,
|
| 16517 |
+
"loss": 8.3741,
|
| 16518 |
+
"step": 2332
|
| 16519 |
+
},
|
| 16520 |
+
{
|
| 16521 |
+
"epoch": 0.10076012783968213,
|
| 16522 |
+
"grad_norm": 0.53515625,
|
| 16523 |
+
"learning_rate": 0.000992987534346409,
|
| 16524 |
+
"loss": 8.6786,
|
| 16525 |
+
"step": 2333
|
| 16526 |
+
},
|
| 16527 |
+
{
|
| 16528 |
+
"epoch": 0.10080331692148226,
|
| 16529 |
+
"grad_norm": 0.4453125,
|
| 16530 |
+
"learning_rate": 0.0009929756110423193,
|
| 16531 |
+
"loss": 8.4003,
|
| 16532 |
+
"step": 2334
|
| 16533 |
+
},
|
| 16534 |
+
{
|
| 16535 |
+
"epoch": 0.10084650600328238,
|
| 16536 |
+
"grad_norm": 0.5078125,
|
| 16537 |
+
"learning_rate": 0.0009929636776819402,
|
| 16538 |
+
"loss": 8.349,
|
| 16539 |
+
"step": 2335
|
| 16540 |
+
},
|
| 16541 |
+
{
|
| 16542 |
+
"epoch": 0.1008896950850825,
|
| 16543 |
+
"grad_norm": 0.5703125,
|
| 16544 |
+
"learning_rate": 0.0009929517342655155,
|
| 16545 |
+
"loss": 8.2853,
|
| 16546 |
+
"step": 2336
|
| 16547 |
+
},
|
| 16548 |
+
{
|
| 16549 |
+
"epoch": 0.1009328841668826,
|
| 16550 |
+
"grad_norm": 0.52734375,
|
| 16551 |
+
"learning_rate": 0.0009929397807932883,
|
| 16552 |
+
"loss": 8.4347,
|
| 16553 |
+
"step": 2337
|
| 16554 |
+
},
|
| 16555 |
+
{
|
| 16556 |
+
"epoch": 0.10097607324868273,
|
| 16557 |
+
"grad_norm": 0.5390625,
|
| 16558 |
+
"learning_rate": 0.0009929278172655029,
|
| 16559 |
+
"loss": 8.1882,
|
| 16560 |
+
"step": 2338
|
| 16561 |
+
},
|
| 16562 |
+
{
|
| 16563 |
+
"epoch": 0.10101926233048285,
|
| 16564 |
+
"grad_norm": 1.109375,
|
| 16565 |
+
"learning_rate": 0.0009929158436824033,
|
| 16566 |
+
"loss": 8.5276,
|
| 16567 |
+
"step": 2339
|
| 16568 |
+
},
|
| 16569 |
+
{
|
| 16570 |
+
"epoch": 0.10106245141228297,
|
| 16571 |
+
"grad_norm": 0.6015625,
|
| 16572 |
+
"learning_rate": 0.0009929038600442336,
|
| 16573 |
+
"loss": 8.2453,
|
| 16574 |
+
"step": 2340
|
| 16575 |
+
},
|
| 16576 |
+
{
|
| 16577 |
+
"epoch": 0.10110564049408309,
|
| 16578 |
+
"grad_norm": 0.80078125,
|
| 16579 |
+
"learning_rate": 0.0009928918663512382,
|
| 16580 |
+
"loss": 8.5133,
|
| 16581 |
+
"step": 2341
|
| 16582 |
+
},
|
| 16583 |
+
{
|
| 16584 |
+
"epoch": 0.10114882957588321,
|
| 16585 |
+
"grad_norm": 1.03125,
|
| 16586 |
+
"learning_rate": 0.000992879862603662,
|
| 16587 |
+
"loss": 8.1969,
|
| 16588 |
+
"step": 2342
|
| 16589 |
+
},
|
| 16590 |
+
{
|
| 16591 |
+
"epoch": 0.10119201865768333,
|
| 16592 |
+
"grad_norm": 0.466796875,
|
| 16593 |
+
"learning_rate": 0.0009928678488017497,
|
| 16594 |
+
"loss": 8.2737,
|
| 16595 |
+
"step": 2343
|
| 16596 |
+
},
|
| 16597 |
+
{
|
| 16598 |
+
"epoch": 0.10123520773948345,
|
| 16599 |
+
"grad_norm": 0.7734375,
|
| 16600 |
+
"learning_rate": 0.0009928558249457462,
|
| 16601 |
+
"loss": 8.333,
|
| 16602 |
+
"step": 2344
|
| 16603 |
+
},
|
| 16604 |
+
{
|
| 16605 |
+
"epoch": 0.10127839682128358,
|
| 16606 |
+
"grad_norm": 0.439453125,
|
| 16607 |
+
"learning_rate": 0.0009928437910358971,
|
| 16608 |
+
"loss": 8.2923,
|
| 16609 |
+
"step": 2345
|
| 16610 |
+
},
|
| 16611 |
+
{
|
| 16612 |
+
"epoch": 0.1013215859030837,
|
| 16613 |
+
"grad_norm": 0.7421875,
|
| 16614 |
+
"learning_rate": 0.000992831747072448,
|
| 16615 |
+
"loss": 8.5941,
|
| 16616 |
+
"step": 2346
|
| 16617 |
+
},
|
| 16618 |
+
{
|
| 16619 |
+
"epoch": 0.10136477498488382,
|
| 16620 |
+
"grad_norm": 0.62109375,
|
| 16621 |
+
"learning_rate": 0.0009928196930556442,
|
| 16622 |
+
"loss": 8.2137,
|
| 16623 |
+
"step": 2347
|
| 16624 |
+
},
|
| 16625 |
+
{
|
| 16626 |
+
"epoch": 0.10140796406668394,
|
| 16627 |
+
"grad_norm": 0.5546875,
|
| 16628 |
+
"learning_rate": 0.0009928076289857318,
|
| 16629 |
+
"loss": 8.5716,
|
| 16630 |
+
"step": 2348
|
| 16631 |
+
},
|
| 16632 |
+
{
|
| 16633 |
+
"epoch": 0.10145115314848406,
|
| 16634 |
+
"grad_norm": 0.8203125,
|
| 16635 |
+
"learning_rate": 0.0009927955548629567,
|
| 16636 |
+
"loss": 8.4401,
|
| 16637 |
+
"step": 2349
|
| 16638 |
+
},
|
| 16639 |
+
{
|
| 16640 |
+
"epoch": 0.10149434223028418,
|
| 16641 |
+
"grad_norm": 0.7734375,
|
| 16642 |
+
"learning_rate": 0.0009927834706875654,
|
| 16643 |
+
"loss": 8.3506,
|
| 16644 |
+
"step": 2350
|
| 16645 |
+
},
|
| 16646 |
+
{
|
| 16647 |
+
"epoch": 0.1015375313120843,
|
| 16648 |
+
"grad_norm": 1.859375,
|
| 16649 |
+
"learning_rate": 0.0009927713764598042,
|
| 16650 |
+
"loss": 8.1892,
|
| 16651 |
+
"step": 2351
|
| 16652 |
+
},
|
| 16653 |
+
{
|
| 16654 |
+
"epoch": 0.10158072039388442,
|
| 16655 |
+
"grad_norm": 0.71484375,
|
| 16656 |
+
"learning_rate": 0.0009927592721799203,
|
| 16657 |
+
"loss": 8.5624,
|
| 16658 |
+
"step": 2352
|
| 16659 |
+
},
|
| 16660 |
+
{
|
| 16661 |
+
"epoch": 0.10162390947568455,
|
| 16662 |
+
"grad_norm": 0.62109375,
|
| 16663 |
+
"learning_rate": 0.0009927471578481599,
|
| 16664 |
+
"loss": 8.0008,
|
| 16665 |
+
"step": 2353
|
| 16666 |
+
},
|
| 16667 |
+
{
|
| 16668 |
+
"epoch": 0.10166709855748467,
|
| 16669 |
+
"grad_norm": 0.73828125,
|
| 16670 |
+
"learning_rate": 0.000992735033464771,
|
| 16671 |
+
"loss": 8.2854,
|
| 16672 |
+
"step": 2354
|
| 16673 |
+
},
|
| 16674 |
+
{
|
| 16675 |
+
"epoch": 0.10171028763928479,
|
| 16676 |
+
"grad_norm": 0.64453125,
|
| 16677 |
+
"learning_rate": 0.0009927228990299999,
|
| 16678 |
+
"loss": 8.2719,
|
| 16679 |
+
"step": 2355
|
| 16680 |
+
},
|
| 16681 |
+
{
|
| 16682 |
+
"epoch": 0.10175347672108491,
|
| 16683 |
+
"grad_norm": 0.484375,
|
| 16684 |
+
"learning_rate": 0.000992710754544095,
|
| 16685 |
+
"loss": 8.3854,
|
| 16686 |
+
"step": 2356
|
| 16687 |
+
},
|
| 16688 |
+
{
|
| 16689 |
+
"epoch": 0.10179666580288503,
|
| 16690 |
+
"grad_norm": 0.91015625,
|
| 16691 |
+
"learning_rate": 0.0009926986000073036,
|
| 16692 |
+
"loss": 8.4913,
|
| 16693 |
+
"step": 2357
|
| 16694 |
+
},
|
| 16695 |
+
{
|
| 16696 |
+
"epoch": 0.10183985488468515,
|
| 16697 |
+
"grad_norm": 0.62109375,
|
| 16698 |
+
"learning_rate": 0.0009926864354198738,
|
| 16699 |
+
"loss": 8.4161,
|
| 16700 |
+
"step": 2358
|
| 16701 |
+
},
|
| 16702 |
+
{
|
| 16703 |
+
"epoch": 0.10188304396648527,
|
| 16704 |
+
"grad_norm": 0.62109375,
|
| 16705 |
+
"learning_rate": 0.0009926742607820535,
|
| 16706 |
+
"loss": 8.365,
|
| 16707 |
+
"step": 2359
|
| 16708 |
+
},
|
| 16709 |
+
{
|
| 16710 |
+
"epoch": 0.1019262330482854,
|
| 16711 |
+
"grad_norm": 0.51953125,
|
| 16712 |
+
"learning_rate": 0.0009926620760940914,
|
| 16713 |
+
"loss": 8.2836,
|
| 16714 |
+
"step": 2360
|
| 16715 |
+
},
|
| 16716 |
+
{
|
| 16717 |
+
"epoch": 0.10196942213008552,
|
| 16718 |
+
"grad_norm": 0.63671875,
|
| 16719 |
+
"learning_rate": 0.0009926498813562358,
|
| 16720 |
+
"loss": 8.7514,
|
| 16721 |
+
"step": 2361
|
| 16722 |
+
},
|
| 16723 |
+
{
|
| 16724 |
+
"epoch": 0.10201261121188564,
|
| 16725 |
+
"grad_norm": 0.54296875,
|
| 16726 |
+
"learning_rate": 0.0009926376765687357,
|
| 16727 |
+
"loss": 8.4432,
|
| 16728 |
+
"step": 2362
|
| 16729 |
+
},
|
| 16730 |
+
{
|
| 16731 |
+
"epoch": 0.10205580029368576,
|
| 16732 |
+
"grad_norm": 0.6875,
|
| 16733 |
+
"learning_rate": 0.0009926254617318398,
|
| 16734 |
+
"loss": 8.3274,
|
| 16735 |
+
"step": 2363
|
| 16736 |
+
},
|
| 16737 |
+
{
|
| 16738 |
+
"epoch": 0.10209898937548588,
|
| 16739 |
+
"grad_norm": 0.55859375,
|
| 16740 |
+
"learning_rate": 0.0009926132368457974,
|
| 16741 |
+
"loss": 8.3462,
|
| 16742 |
+
"step": 2364
|
| 16743 |
+
},
|
| 16744 |
+
{
|
| 16745 |
+
"epoch": 0.102142178457286,
|
| 16746 |
+
"grad_norm": 0.6953125,
|
| 16747 |
+
"learning_rate": 0.0009926010019108578,
|
| 16748 |
+
"loss": 8.3675,
|
| 16749 |
+
"step": 2365
|
| 16750 |
+
},
|
| 16751 |
+
{
|
| 16752 |
+
"epoch": 0.10218536753908612,
|
| 16753 |
+
"grad_norm": 0.65625,
|
| 16754 |
+
"learning_rate": 0.0009925887569272708,
|
| 16755 |
+
"loss": 8.2461,
|
| 16756 |
+
"step": 2366
|
| 16757 |
+
},
|
| 16758 |
+
{
|
| 16759 |
+
"epoch": 0.10222855662088624,
|
| 16760 |
+
"grad_norm": 0.53515625,
|
| 16761 |
+
"learning_rate": 0.0009925765018952862,
|
| 16762 |
+
"loss": 8.1833,
|
| 16763 |
+
"step": 2367
|
| 16764 |
+
},
|
| 16765 |
+
{
|
| 16766 |
+
"epoch": 0.10227174570268636,
|
| 16767 |
+
"grad_norm": 0.6953125,
|
| 16768 |
+
"learning_rate": 0.0009925642368151535,
|
| 16769 |
+
"loss": 8.0176,
|
| 16770 |
+
"step": 2368
|
| 16771 |
+
},
|
| 16772 |
+
{
|
| 16773 |
+
"epoch": 0.10231493478448649,
|
| 16774 |
+
"grad_norm": 0.5234375,
|
| 16775 |
+
"learning_rate": 0.0009925519616871235,
|
| 16776 |
+
"loss": 8.4266,
|
| 16777 |
+
"step": 2369
|
| 16778 |
+
},
|
| 16779 |
+
{
|
| 16780 |
+
"epoch": 0.10235812386628661,
|
| 16781 |
+
"grad_norm": 0.404296875,
|
| 16782 |
+
"learning_rate": 0.0009925396765114462,
|
| 16783 |
+
"loss": 8.3391,
|
| 16784 |
+
"step": 2370
|
| 16785 |
+
},
|
| 16786 |
+
{
|
| 16787 |
+
"epoch": 0.10240131294808673,
|
| 16788 |
+
"grad_norm": 0.45703125,
|
| 16789 |
+
"learning_rate": 0.0009925273812883724,
|
| 16790 |
+
"loss": 8.6259,
|
| 16791 |
+
"step": 2371
|
| 16792 |
+
},
|
| 16793 |
+
{
|
| 16794 |
+
"epoch": 0.10244450202988685,
|
| 16795 |
+
"grad_norm": 0.609375,
|
| 16796 |
+
"learning_rate": 0.000992515076018153,
|
| 16797 |
+
"loss": 8.2038,
|
| 16798 |
+
"step": 2372
|
| 16799 |
+
},
|
| 16800 |
+
{
|
| 16801 |
+
"epoch": 0.10248769111168697,
|
| 16802 |
+
"grad_norm": 0.404296875,
|
| 16803 |
+
"learning_rate": 0.0009925027607010385,
|
| 16804 |
+
"loss": 8.5486,
|
| 16805 |
+
"step": 2373
|
| 16806 |
+
},
|
| 16807 |
+
{
|
| 16808 |
+
"epoch": 0.10253088019348709,
|
| 16809 |
+
"grad_norm": 0.5,
|
| 16810 |
+
"learning_rate": 0.0009924904353372807,
|
| 16811 |
+
"loss": 8.4328,
|
| 16812 |
+
"step": 2374
|
| 16813 |
+
},
|
| 16814 |
+
{
|
| 16815 |
+
"epoch": 0.10257406927528721,
|
| 16816 |
+
"grad_norm": 0.4296875,
|
| 16817 |
+
"learning_rate": 0.000992478099927131,
|
| 16818 |
+
"loss": 8.3761,
|
| 16819 |
+
"step": 2375
|
| 16820 |
+
},
|
| 16821 |
+
{
|
| 16822 |
+
"epoch": 0.10261725835708733,
|
| 16823 |
+
"grad_norm": 0.515625,
|
| 16824 |
+
"learning_rate": 0.0009924657544708407,
|
| 16825 |
+
"loss": 8.4608,
|
| 16826 |
+
"step": 2376
|
| 16827 |
+
},
|
| 16828 |
+
{
|
| 16829 |
+
"epoch": 0.10266044743888746,
|
| 16830 |
+
"grad_norm": 0.7578125,
|
| 16831 |
+
"learning_rate": 0.0009924533989686616,
|
| 16832 |
+
"loss": 8.3795,
|
| 16833 |
+
"step": 2377
|
| 16834 |
+
},
|
| 16835 |
+
{
|
| 16836 |
+
"epoch": 0.10270363652068756,
|
| 16837 |
+
"grad_norm": 0.51171875,
|
| 16838 |
+
"learning_rate": 0.0009924410334208463,
|
| 16839 |
+
"loss": 8.2331,
|
| 16840 |
+
"step": 2378
|
| 16841 |
+
},
|
| 16842 |
+
{
|
| 16843 |
+
"epoch": 0.10274682560248768,
|
| 16844 |
+
"grad_norm": 0.55078125,
|
| 16845 |
+
"learning_rate": 0.0009924286578276464,
|
| 16846 |
+
"loss": 8.6885,
|
| 16847 |
+
"step": 2379
|
| 16848 |
+
},
|
| 16849 |
+
{
|
| 16850 |
+
"epoch": 0.1027900146842878,
|
| 16851 |
+
"grad_norm": 0.50390625,
|
| 16852 |
+
"learning_rate": 0.0009924162721893148,
|
| 16853 |
+
"loss": 8.3229,
|
| 16854 |
+
"step": 2380
|
| 16855 |
+
},
|
| 16856 |
+
{
|
| 16857 |
+
"epoch": 0.10283320376608793,
|
| 16858 |
+
"grad_norm": 0.494140625,
|
| 16859 |
+
"learning_rate": 0.000992403876506104,
|
| 16860 |
+
"loss": 8.5207,
|
| 16861 |
+
"step": 2381
|
| 16862 |
+
},
|
| 16863 |
+
{
|
| 16864 |
+
"epoch": 0.10287639284788805,
|
| 16865 |
+
"grad_norm": 0.5390625,
|
| 16866 |
+
"learning_rate": 0.0009923914707782669,
|
| 16867 |
+
"loss": 8.5089,
|
| 16868 |
+
"step": 2382
|
| 16869 |
+
},
|
| 16870 |
+
{
|
| 16871 |
+
"epoch": 0.10291958192968817,
|
| 16872 |
+
"grad_norm": 0.49609375,
|
| 16873 |
+
"learning_rate": 0.0009923790550060564,
|
| 16874 |
+
"loss": 8.4575,
|
| 16875 |
+
"step": 2383
|
| 16876 |
+
},
|
| 16877 |
+
{
|
| 16878 |
+
"epoch": 0.10296277101148829,
|
| 16879 |
+
"grad_norm": 0.58203125,
|
| 16880 |
+
"learning_rate": 0.0009923666291897259,
|
| 16881 |
+
"loss": 8.3337,
|
| 16882 |
+
"step": 2384
|
| 16883 |
+
},
|
| 16884 |
+
{
|
| 16885 |
+
"epoch": 0.10300596009328841,
|
| 16886 |
+
"grad_norm": 0.41796875,
|
| 16887 |
+
"learning_rate": 0.0009923541933295288,
|
| 16888 |
+
"loss": 8.537,
|
| 16889 |
+
"step": 2385
|
| 16890 |
+
},
|
| 16891 |
+
{
|
| 16892 |
+
"epoch": 0.10304914917508853,
|
| 16893 |
+
"grad_norm": 0.486328125,
|
| 16894 |
+
"learning_rate": 0.000992341747425719,
|
| 16895 |
+
"loss": 8.3868,
|
| 16896 |
+
"step": 2386
|
| 16897 |
+
},
|
| 16898 |
+
{
|
| 16899 |
+
"epoch": 0.10309233825688865,
|
| 16900 |
+
"grad_norm": 0.458984375,
|
| 16901 |
+
"learning_rate": 0.0009923292914785502,
|
| 16902 |
+
"loss": 8.5046,
|
| 16903 |
+
"step": 2387
|
| 16904 |
+
},
|
| 16905 |
+
{
|
| 16906 |
+
"epoch": 0.10313552733868878,
|
| 16907 |
+
"grad_norm": 0.55859375,
|
| 16908 |
+
"learning_rate": 0.0009923168254882763,
|
| 16909 |
+
"loss": 8.3372,
|
| 16910 |
+
"step": 2388
|
| 16911 |
+
},
|
| 16912 |
+
{
|
| 16913 |
+
"epoch": 0.1031787164204889,
|
| 16914 |
+
"grad_norm": 0.9140625,
|
| 16915 |
+
"learning_rate": 0.0009923043494551522,
|
| 16916 |
+
"loss": 8.3775,
|
| 16917 |
+
"step": 2389
|
| 16918 |
+
},
|
| 16919 |
+
{
|
| 16920 |
+
"epoch": 0.10322190550228902,
|
| 16921 |
+
"grad_norm": 0.9921875,
|
| 16922 |
+
"learning_rate": 0.0009922918633794317,
|
| 16923 |
+
"loss": 8.665,
|
| 16924 |
+
"step": 2390
|
| 16925 |
+
},
|
| 16926 |
+
{
|
| 16927 |
+
"epoch": 0.10326509458408914,
|
| 16928 |
+
"grad_norm": 0.470703125,
|
| 16929 |
+
"learning_rate": 0.00099227936726137,
|
| 16930 |
+
"loss": 8.502,
|
| 16931 |
+
"step": 2391
|
| 16932 |
+
},
|
| 16933 |
+
{
|
| 16934 |
+
"epoch": 0.10330828366588926,
|
| 16935 |
+
"grad_norm": 0.9453125,
|
| 16936 |
+
"learning_rate": 0.0009922668611012217,
|
| 16937 |
+
"loss": 8.6291,
|
| 16938 |
+
"step": 2392
|
| 16939 |
+
},
|
| 16940 |
+
{
|
| 16941 |
+
"epoch": 0.10335147274768938,
|
| 16942 |
+
"grad_norm": 0.6953125,
|
| 16943 |
+
"learning_rate": 0.0009922543448992423,
|
| 16944 |
+
"loss": 8.2589,
|
| 16945 |
+
"step": 2393
|
| 16946 |
+
},
|
| 16947 |
+
{
|
| 16948 |
+
"epoch": 0.1033946618294895,
|
| 16949 |
+
"grad_norm": 1.2421875,
|
| 16950 |
+
"learning_rate": 0.0009922418186556867,
|
| 16951 |
+
"loss": 7.6237,
|
| 16952 |
+
"step": 2394
|
| 16953 |
+
},
|
| 16954 |
+
{
|
| 16955 |
+
"epoch": 0.10343785091128962,
|
| 16956 |
+
"grad_norm": 0.91796875,
|
| 16957 |
+
"learning_rate": 0.0009922292823708106,
|
| 16958 |
+
"loss": 8.5899,
|
| 16959 |
+
"step": 2395
|
| 16960 |
+
},
|
| 16961 |
+
{
|
| 16962 |
+
"epoch": 0.10348103999308975,
|
| 16963 |
+
"grad_norm": 0.55859375,
|
| 16964 |
+
"learning_rate": 0.0009922167360448698,
|
| 16965 |
+
"loss": 8.184,
|
| 16966 |
+
"step": 2396
|
| 16967 |
+
},
|
| 16968 |
+
{
|
| 16969 |
+
"epoch": 0.10352422907488987,
|
| 16970 |
+
"grad_norm": 0.9921875,
|
| 16971 |
+
"learning_rate": 0.00099220417967812,
|
| 16972 |
+
"loss": 8.4565,
|
| 16973 |
+
"step": 2397
|
| 16974 |
+
},
|
| 16975 |
+
{
|
| 16976 |
+
"epoch": 0.10356741815668999,
|
| 16977 |
+
"grad_norm": 0.63671875,
|
| 16978 |
+
"learning_rate": 0.0009921916132708177,
|
| 16979 |
+
"loss": 8.3024,
|
| 16980 |
+
"step": 2398
|
| 16981 |
+
},
|
| 16982 |
+
{
|
| 16983 |
+
"epoch": 0.10361060723849011,
|
| 16984 |
+
"grad_norm": 0.67578125,
|
| 16985 |
+
"learning_rate": 0.0009921790368232189,
|
| 16986 |
+
"loss": 8.7929,
|
| 16987 |
+
"step": 2399
|
| 16988 |
+
},
|
| 16989 |
+
{
|
| 16990 |
+
"epoch": 0.10365379632029023,
|
| 16991 |
+
"grad_norm": 0.61328125,
|
| 16992 |
+
"learning_rate": 0.0009921664503355803,
|
| 16993 |
+
"loss": 8.1994,
|
| 16994 |
+
"step": 2400
|
| 16995 |
+
},
|
| 16996 |
+
{
|
| 16997 |
+
"epoch": 0.10365379632029023,
|
| 16998 |
+
"eval_loss": 8.382716178894043,
|
| 16999 |
+
"eval_runtime": 13.8499,
|
| 17000 |
+
"eval_samples_per_second": 1.733,
|
| 17001 |
+
"eval_steps_per_second": 0.217,
|
| 17002 |
+
"step": 2400
|
| 17003 |
}
|
| 17004 |
],
|
| 17005 |
"logging_steps": 1,
|
|
|
|
| 17019 |
"attributes": {}
|
| 17020 |
}
|
| 17021 |
},
|
| 17022 |
+
"total_flos": 7671722803200.0,
|
| 17023 |
"train_batch_size": 1,
|
| 17024 |
"trial_name": null,
|
| 17025 |
"trial_params": null
|