Training in progress, step 76000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +353 -3
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 304481530
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc06a7f089f926af24a4dafd2fc5c68a00957b0501ae37664b3613577e08b3af
|
| 3 |
size 304481530
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 402029570
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9513e34a098ccfc4d0eeceb95099c5472b4ff0a71cffb25d876aad974cab2486
|
| 3 |
size 402029570
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c3691082114682896d0f28ee5b4c8f41d4639d4efe6c895c755146048ab7c832
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c20256e223141e6700101ae515de5a6287d380eaea8a4346e1c56536ce67dcb
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a5392970d44236e7a431111e07b6640793728800da16c52d461401ef3040338a
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:756f7cda01c1bba0353fe356cfd74ccb32f9626ff6708219372ea8a4c1ba35dc
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:178aedfc2920966f379dc01376957ddc00b6df6f84cf67e8abd741361412b63d
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -26258,6 +26258,356 @@
|
|
| 26258 |
"learning_rate": 0.00048160271404356535,
|
| 26259 |
"loss": 16.4454,
|
| 26260 |
"step": 75000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26261 |
}
|
| 26262 |
],
|
| 26263 |
"logging_steps": 20,
|
|
@@ -26277,7 +26627,7 @@
|
|
| 26277 |
"attributes": {}
|
| 26278 |
}
|
| 26279 |
},
|
| 26280 |
-
"total_flos": 5.
|
| 26281 |
"train_batch_size": 48,
|
| 26282 |
"trial_name": null,
|
| 26283 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.11257991692787182,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 76000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 26258 |
"learning_rate": 0.00048160271404356535,
|
| 26259 |
"loss": 16.4454,
|
| 26260 |
"step": 75000
|
| 26261 |
+
},
|
| 26262 |
+
{
|
| 26263 |
+
"epoch": 0.11112822852538085,
|
| 26264 |
+
"grad_norm": 5.6875,
|
| 26265 |
+
"learning_rate": 0.0004815977751085578,
|
| 26266 |
+
"loss": 16.438,
|
| 26267 |
+
"step": 75020
|
| 26268 |
+
},
|
| 26269 |
+
{
|
| 26270 |
+
"epoch": 0.11115785481930923,
|
| 26271 |
+
"grad_norm": 6.6875,
|
| 26272 |
+
"learning_rate": 0.0004815928361735502,
|
| 26273 |
+
"loss": 16.4092,
|
| 26274 |
+
"step": 75040
|
| 26275 |
+
},
|
| 26276 |
+
{
|
| 26277 |
+
"epoch": 0.11118748111323762,
|
| 26278 |
+
"grad_norm": 6.40625,
|
| 26279 |
+
"learning_rate": 0.0004815878972385427,
|
| 26280 |
+
"loss": 16.364,
|
| 26281 |
+
"step": 75060
|
| 26282 |
+
},
|
| 26283 |
+
{
|
| 26284 |
+
"epoch": 0.111217107407166,
|
| 26285 |
+
"grad_norm": 6.90625,
|
| 26286 |
+
"learning_rate": 0.0004815829583035351,
|
| 26287 |
+
"loss": 16.3725,
|
| 26288 |
+
"step": 75080
|
| 26289 |
+
},
|
| 26290 |
+
{
|
| 26291 |
+
"epoch": 0.11124673370109439,
|
| 26292 |
+
"grad_norm": 7.28125,
|
| 26293 |
+
"learning_rate": 0.00048157801936852754,
|
| 26294 |
+
"loss": 16.4758,
|
| 26295 |
+
"step": 75100
|
| 26296 |
+
},
|
| 26297 |
+
{
|
| 26298 |
+
"epoch": 0.11127635999502278,
|
| 26299 |
+
"grad_norm": 7.03125,
|
| 26300 |
+
"learning_rate": 0.00048157308043352,
|
| 26301 |
+
"loss": 16.3516,
|
| 26302 |
+
"step": 75120
|
| 26303 |
+
},
|
| 26304 |
+
{
|
| 26305 |
+
"epoch": 0.11130598628895116,
|
| 26306 |
+
"grad_norm": 7.375,
|
| 26307 |
+
"learning_rate": 0.00048156814149851243,
|
| 26308 |
+
"loss": 16.4029,
|
| 26309 |
+
"step": 75140
|
| 26310 |
+
},
|
| 26311 |
+
{
|
| 26312 |
+
"epoch": 0.11133561258287955,
|
| 26313 |
+
"grad_norm": 6.40625,
|
| 26314 |
+
"learning_rate": 0.0004815632025635048,
|
| 26315 |
+
"loss": 16.4283,
|
| 26316 |
+
"step": 75160
|
| 26317 |
+
},
|
| 26318 |
+
{
|
| 26319 |
+
"epoch": 0.11136523887680795,
|
| 26320 |
+
"grad_norm": 6.40625,
|
| 26321 |
+
"learning_rate": 0.0004815582636284973,
|
| 26322 |
+
"loss": 16.3905,
|
| 26323 |
+
"step": 75180
|
| 26324 |
+
},
|
| 26325 |
+
{
|
| 26326 |
+
"epoch": 0.11139486517073634,
|
| 26327 |
+
"grad_norm": 7.875,
|
| 26328 |
+
"learning_rate": 0.0004815533246934897,
|
| 26329 |
+
"loss": 16.3731,
|
| 26330 |
+
"step": 75200
|
| 26331 |
+
},
|
| 26332 |
+
{
|
| 26333 |
+
"epoch": 0.11142449146466472,
|
| 26334 |
+
"grad_norm": 6.1875,
|
| 26335 |
+
"learning_rate": 0.00048154838575848217,
|
| 26336 |
+
"loss": 16.4243,
|
| 26337 |
+
"step": 75220
|
| 26338 |
+
},
|
| 26339 |
+
{
|
| 26340 |
+
"epoch": 0.11145411775859311,
|
| 26341 |
+
"grad_norm": 7.28125,
|
| 26342 |
+
"learning_rate": 0.00048154344682347456,
|
| 26343 |
+
"loss": 16.3272,
|
| 26344 |
+
"step": 75240
|
| 26345 |
+
},
|
| 26346 |
+
{
|
| 26347 |
+
"epoch": 0.1114837440525215,
|
| 26348 |
+
"grad_norm": 7.125,
|
| 26349 |
+
"learning_rate": 0.00048153850788846696,
|
| 26350 |
+
"loss": 16.3074,
|
| 26351 |
+
"step": 75260
|
| 26352 |
+
},
|
| 26353 |
+
{
|
| 26354 |
+
"epoch": 0.11151337034644988,
|
| 26355 |
+
"grad_norm": 7.78125,
|
| 26356 |
+
"learning_rate": 0.00048153356895345946,
|
| 26357 |
+
"loss": 16.4066,
|
| 26358 |
+
"step": 75280
|
| 26359 |
+
},
|
| 26360 |
+
{
|
| 26361 |
+
"epoch": 0.11154299664037827,
|
| 26362 |
+
"grad_norm": 7.125,
|
| 26363 |
+
"learning_rate": 0.00048152863001845185,
|
| 26364 |
+
"loss": 16.3641,
|
| 26365 |
+
"step": 75300
|
| 26366 |
+
},
|
| 26367 |
+
{
|
| 26368 |
+
"epoch": 0.11157262293430666,
|
| 26369 |
+
"grad_norm": 7.0625,
|
| 26370 |
+
"learning_rate": 0.0004815236910834443,
|
| 26371 |
+
"loss": 16.3674,
|
| 26372 |
+
"step": 75320
|
| 26373 |
+
},
|
| 26374 |
+
{
|
| 26375 |
+
"epoch": 0.11160224922823504,
|
| 26376 |
+
"grad_norm": 7.03125,
|
| 26377 |
+
"learning_rate": 0.0004815187521484367,
|
| 26378 |
+
"loss": 16.3299,
|
| 26379 |
+
"step": 75340
|
| 26380 |
+
},
|
| 26381 |
+
{
|
| 26382 |
+
"epoch": 0.11163187552216343,
|
| 26383 |
+
"grad_norm": 6.65625,
|
| 26384 |
+
"learning_rate": 0.0004815138132134292,
|
| 26385 |
+
"loss": 16.4177,
|
| 26386 |
+
"step": 75360
|
| 26387 |
+
},
|
| 26388 |
+
{
|
| 26389 |
+
"epoch": 0.11166150181609182,
|
| 26390 |
+
"grad_norm": 7.1875,
|
| 26391 |
+
"learning_rate": 0.0004815088742784216,
|
| 26392 |
+
"loss": 16.3523,
|
| 26393 |
+
"step": 75380
|
| 26394 |
+
},
|
| 26395 |
+
{
|
| 26396 |
+
"epoch": 0.1116911281100202,
|
| 26397 |
+
"grad_norm": 7.125,
|
| 26398 |
+
"learning_rate": 0.00048150393534341404,
|
| 26399 |
+
"loss": 16.3449,
|
| 26400 |
+
"step": 75400
|
| 26401 |
+
},
|
| 26402 |
+
{
|
| 26403 |
+
"epoch": 0.11172075440394859,
|
| 26404 |
+
"grad_norm": 6.34375,
|
| 26405 |
+
"learning_rate": 0.0004814989964084065,
|
| 26406 |
+
"loss": 16.3556,
|
| 26407 |
+
"step": 75420
|
| 26408 |
+
},
|
| 26409 |
+
{
|
| 26410 |
+
"epoch": 0.11175038069787697,
|
| 26411 |
+
"grad_norm": 7.28125,
|
| 26412 |
+
"learning_rate": 0.00048149405747339893,
|
| 26413 |
+
"loss": 16.4188,
|
| 26414 |
+
"step": 75440
|
| 26415 |
+
},
|
| 26416 |
+
{
|
| 26417 |
+
"epoch": 0.11178000699180536,
|
| 26418 |
+
"grad_norm": 6.65625,
|
| 26419 |
+
"learning_rate": 0.0004814891185383913,
|
| 26420 |
+
"loss": 16.3877,
|
| 26421 |
+
"step": 75460
|
| 26422 |
+
},
|
| 26423 |
+
{
|
| 26424 |
+
"epoch": 0.11180963328573375,
|
| 26425 |
+
"grad_norm": 6.71875,
|
| 26426 |
+
"learning_rate": 0.0004814841796033838,
|
| 26427 |
+
"loss": 16.3696,
|
| 26428 |
+
"step": 75480
|
| 26429 |
+
},
|
| 26430 |
+
{
|
| 26431 |
+
"epoch": 0.11183925957966215,
|
| 26432 |
+
"grad_norm": 6.8125,
|
| 26433 |
+
"learning_rate": 0.0004814792406683762,
|
| 26434 |
+
"loss": 16.3543,
|
| 26435 |
+
"step": 75500
|
| 26436 |
+
},
|
| 26437 |
+
{
|
| 26438 |
+
"epoch": 0.11186888587359053,
|
| 26439 |
+
"grad_norm": 7.21875,
|
| 26440 |
+
"learning_rate": 0.00048147430173336867,
|
| 26441 |
+
"loss": 16.4576,
|
| 26442 |
+
"step": 75520
|
| 26443 |
+
},
|
| 26444 |
+
{
|
| 26445 |
+
"epoch": 0.11189851216751892,
|
| 26446 |
+
"grad_norm": 6.625,
|
| 26447 |
+
"learning_rate": 0.00048146936279836106,
|
| 26448 |
+
"loss": 16.3673,
|
| 26449 |
+
"step": 75540
|
| 26450 |
+
},
|
| 26451 |
+
{
|
| 26452 |
+
"epoch": 0.11192813846144731,
|
| 26453 |
+
"grad_norm": 7.25,
|
| 26454 |
+
"learning_rate": 0.0004814644238633535,
|
| 26455 |
+
"loss": 16.4263,
|
| 26456 |
+
"step": 75560
|
| 26457 |
+
},
|
| 26458 |
+
{
|
| 26459 |
+
"epoch": 0.1119577647553757,
|
| 26460 |
+
"grad_norm": 6.75,
|
| 26461 |
+
"learning_rate": 0.00048145948492834596,
|
| 26462 |
+
"loss": 16.3437,
|
| 26463 |
+
"step": 75580
|
| 26464 |
+
},
|
| 26465 |
+
{
|
| 26466 |
+
"epoch": 0.11198739104930408,
|
| 26467 |
+
"grad_norm": 6.21875,
|
| 26468 |
+
"learning_rate": 0.00048145454599333835,
|
| 26469 |
+
"loss": 16.3618,
|
| 26470 |
+
"step": 75600
|
| 26471 |
+
},
|
| 26472 |
+
{
|
| 26473 |
+
"epoch": 0.11201701734323247,
|
| 26474 |
+
"grad_norm": 6.71875,
|
| 26475 |
+
"learning_rate": 0.0004814496070583308,
|
| 26476 |
+
"loss": 16.3418,
|
| 26477 |
+
"step": 75620
|
| 26478 |
+
},
|
| 26479 |
+
{
|
| 26480 |
+
"epoch": 0.11204664363716085,
|
| 26481 |
+
"grad_norm": 6.90625,
|
| 26482 |
+
"learning_rate": 0.0004814446681233232,
|
| 26483 |
+
"loss": 16.3916,
|
| 26484 |
+
"step": 75640
|
| 26485 |
+
},
|
| 26486 |
+
{
|
| 26487 |
+
"epoch": 0.11207626993108924,
|
| 26488 |
+
"grad_norm": 7.21875,
|
| 26489 |
+
"learning_rate": 0.0004814397291883157,
|
| 26490 |
+
"loss": 16.3521,
|
| 26491 |
+
"step": 75660
|
| 26492 |
+
},
|
| 26493 |
+
{
|
| 26494 |
+
"epoch": 0.11210589622501763,
|
| 26495 |
+
"grad_norm": 6.84375,
|
| 26496 |
+
"learning_rate": 0.0004814347902533081,
|
| 26497 |
+
"loss": 16.3464,
|
| 26498 |
+
"step": 75680
|
| 26499 |
+
},
|
| 26500 |
+
{
|
| 26501 |
+
"epoch": 0.11213552251894601,
|
| 26502 |
+
"grad_norm": 6.40625,
|
| 26503 |
+
"learning_rate": 0.00048142985131830054,
|
| 26504 |
+
"loss": 16.3965,
|
| 26505 |
+
"step": 75700
|
| 26506 |
+
},
|
| 26507 |
+
{
|
| 26508 |
+
"epoch": 0.1121651488128744,
|
| 26509 |
+
"grad_norm": 6.53125,
|
| 26510 |
+
"learning_rate": 0.000481424912383293,
|
| 26511 |
+
"loss": 16.379,
|
| 26512 |
+
"step": 75720
|
| 26513 |
+
},
|
| 26514 |
+
{
|
| 26515 |
+
"epoch": 0.11219477510680279,
|
| 26516 |
+
"grad_norm": 6.375,
|
| 26517 |
+
"learning_rate": 0.00048141997344828543,
|
| 26518 |
+
"loss": 16.3858,
|
| 26519 |
+
"step": 75740
|
| 26520 |
+
},
|
| 26521 |
+
{
|
| 26522 |
+
"epoch": 0.11222440140073117,
|
| 26523 |
+
"grad_norm": 6.75,
|
| 26524 |
+
"learning_rate": 0.0004814150345132778,
|
| 26525 |
+
"loss": 16.3445,
|
| 26526 |
+
"step": 75760
|
| 26527 |
+
},
|
| 26528 |
+
{
|
| 26529 |
+
"epoch": 0.11225402769465956,
|
| 26530 |
+
"grad_norm": 6.34375,
|
| 26531 |
+
"learning_rate": 0.0004814100955782703,
|
| 26532 |
+
"loss": 16.2839,
|
| 26533 |
+
"step": 75780
|
| 26534 |
+
},
|
| 26535 |
+
{
|
| 26536 |
+
"epoch": 0.11228365398858794,
|
| 26537 |
+
"grad_norm": 6.15625,
|
| 26538 |
+
"learning_rate": 0.0004814051566432627,
|
| 26539 |
+
"loss": 16.3684,
|
| 26540 |
+
"step": 75800
|
| 26541 |
+
},
|
| 26542 |
+
{
|
| 26543 |
+
"epoch": 0.11231328028251635,
|
| 26544 |
+
"grad_norm": 7.375,
|
| 26545 |
+
"learning_rate": 0.00048140021770825517,
|
| 26546 |
+
"loss": 16.3344,
|
| 26547 |
+
"step": 75820
|
| 26548 |
+
},
|
| 26549 |
+
{
|
| 26550 |
+
"epoch": 0.11234290657644473,
|
| 26551 |
+
"grad_norm": 7.0625,
|
| 26552 |
+
"learning_rate": 0.00048139527877324756,
|
| 26553 |
+
"loss": 16.3251,
|
| 26554 |
+
"step": 75840
|
| 26555 |
+
},
|
| 26556 |
+
{
|
| 26557 |
+
"epoch": 0.11237253287037312,
|
| 26558 |
+
"grad_norm": 7.1875,
|
| 26559 |
+
"learning_rate": 0.00048139033983824,
|
| 26560 |
+
"loss": 16.3266,
|
| 26561 |
+
"step": 75860
|
| 26562 |
+
},
|
| 26563 |
+
{
|
| 26564 |
+
"epoch": 0.1124021591643015,
|
| 26565 |
+
"grad_norm": 6.5625,
|
| 26566 |
+
"learning_rate": 0.00048138540090323246,
|
| 26567 |
+
"loss": 16.3339,
|
| 26568 |
+
"step": 75880
|
| 26569 |
+
},
|
| 26570 |
+
{
|
| 26571 |
+
"epoch": 0.11243178545822989,
|
| 26572 |
+
"grad_norm": 7.59375,
|
| 26573 |
+
"learning_rate": 0.0004813804619682249,
|
| 26574 |
+
"loss": 16.3593,
|
| 26575 |
+
"step": 75900
|
| 26576 |
+
},
|
| 26577 |
+
{
|
| 26578 |
+
"epoch": 0.11246141175215828,
|
| 26579 |
+
"grad_norm": 6.1875,
|
| 26580 |
+
"learning_rate": 0.0004813755230332173,
|
| 26581 |
+
"loss": 16.3293,
|
| 26582 |
+
"step": 75920
|
| 26583 |
+
},
|
| 26584 |
+
{
|
| 26585 |
+
"epoch": 0.11249103804608666,
|
| 26586 |
+
"grad_norm": 7.3125,
|
| 26587 |
+
"learning_rate": 0.0004813705840982097,
|
| 26588 |
+
"loss": 16.3553,
|
| 26589 |
+
"step": 75940
|
| 26590 |
+
},
|
| 26591 |
+
{
|
| 26592 |
+
"epoch": 0.11252066434001505,
|
| 26593 |
+
"grad_norm": 6.9375,
|
| 26594 |
+
"learning_rate": 0.0004813656451632022,
|
| 26595 |
+
"loss": 16.375,
|
| 26596 |
+
"step": 75960
|
| 26597 |
+
},
|
| 26598 |
+
{
|
| 26599 |
+
"epoch": 0.11255029063394344,
|
| 26600 |
+
"grad_norm": 6.65625,
|
| 26601 |
+
"learning_rate": 0.0004813607062281946,
|
| 26602 |
+
"loss": 16.3077,
|
| 26603 |
+
"step": 75980
|
| 26604 |
+
},
|
| 26605 |
+
{
|
| 26606 |
+
"epoch": 0.11257991692787182,
|
| 26607 |
+
"grad_norm": 7.25,
|
| 26608 |
+
"learning_rate": 0.00048135576729318704,
|
| 26609 |
+
"loss": 16.3442,
|
| 26610 |
+
"step": 76000
|
| 26611 |
}
|
| 26612 |
],
|
| 26613 |
"logging_steps": 20,
|
|
|
|
| 26627 |
"attributes": {}
|
| 26628 |
}
|
| 26629 |
},
|
| 26630 |
+
"total_flos": 5.587853722915124e+19,
|
| 26631 |
"train_batch_size": 48,
|
| 26632 |
"trial_name": null,
|
| 26633 |
"trial_params": null
|