Training in progress, step 6500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 328277848
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:539ec21ed2f2d5401b90d0d0b28a43621343b47ec158a5dc912ef7d73a069cdf
|
| 3 |
size 328277848
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 318646859
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c631e1446372f309276121049f5c8b7603bed555765afc41b0a5db7f194949eb
|
| 3 |
size 318646859
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59ab6babcc58d5a8a0338e2999283607960e6faa29d71e8d0c3f11e2480b272d
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ad5a3c7ee6384cdea60f7a41957135fc1d6a8e0bdd3b9a0dd5c4c46f69d638ec
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4311,6 +4311,364 @@
|
|
| 4311 |
"eval_samples_per_second": 208.452,
|
| 4312 |
"eval_steps_per_second": 4.377,
|
| 4313 |
"step": 6000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4314 |
}
|
| 4315 |
],
|
| 4316 |
"logging_steps": 10,
|
|
@@ -4330,7 +4688,7 @@
|
|
| 4330 |
"attributes": {}
|
| 4331 |
}
|
| 4332 |
},
|
| 4333 |
-
"total_flos": 2.
|
| 4334 |
"train_batch_size": 48,
|
| 4335 |
"trial_name": null,
|
| 4336 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.0981584727149856,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 6500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4311 |
"eval_samples_per_second": 208.452,
|
| 4312 |
"eval_steps_per_second": 4.377,
|
| 4313 |
"step": 6000
|
| 4314 |
+
},
|
| 4315 |
+
{
|
| 4316 |
+
"epoch": 1.0153742186180097,
|
| 4317 |
+
"grad_norm": 0.5339483022689819,
|
| 4318 |
+
"learning_rate": 0.00019297799453520028,
|
| 4319 |
+
"loss": 4.500830459594726,
|
| 4320 |
+
"step": 6010
|
| 4321 |
+
},
|
| 4322 |
+
{
|
| 4323 |
+
"epoch": 1.0170636931914174,
|
| 4324 |
+
"grad_norm": 0.5642256736755371,
|
| 4325 |
+
"learning_rate": 0.00019251885987680252,
|
| 4326 |
+
"loss": 4.485746002197265,
|
| 4327 |
+
"step": 6020
|
| 4328 |
+
},
|
| 4329 |
+
{
|
| 4330 |
+
"epoch": 1.0187531677648252,
|
| 4331 |
+
"grad_norm": 0.5060975551605225,
|
| 4332 |
+
"learning_rate": 0.00019205929164029217,
|
| 4333 |
+
"loss": 4.475402450561523,
|
| 4334 |
+
"step": 6030
|
| 4335 |
+
},
|
| 4336 |
+
{
|
| 4337 |
+
"epoch": 1.020442642338233,
|
| 4338 |
+
"grad_norm": 0.49786120653152466,
|
| 4339 |
+
"learning_rate": 0.00019159929451203033,
|
| 4340 |
+
"loss": 4.486777114868164,
|
| 4341 |
+
"step": 6040
|
| 4342 |
+
},
|
| 4343 |
+
{
|
| 4344 |
+
"epoch": 1.0221321169116404,
|
| 4345 |
+
"grad_norm": 0.506598949432373,
|
| 4346 |
+
"learning_rate": 0.00019113887318275149,
|
| 4347 |
+
"loss": 4.489146041870117,
|
| 4348 |
+
"step": 6050
|
| 4349 |
+
},
|
| 4350 |
+
{
|
| 4351 |
+
"epoch": 1.0238215914850481,
|
| 4352 |
+
"grad_norm": 0.48270103335380554,
|
| 4353 |
+
"learning_rate": 0.00019067803234751603,
|
| 4354 |
+
"loss": 4.474691009521484,
|
| 4355 |
+
"step": 6060
|
| 4356 |
+
},
|
| 4357 |
+
{
|
| 4358 |
+
"epoch": 1.0255110660584559,
|
| 4359 |
+
"grad_norm": 0.48239970207214355,
|
| 4360 |
+
"learning_rate": 0.00019021677670566208,
|
| 4361 |
+
"loss": 4.4708606719970705,
|
| 4362 |
+
"step": 6070
|
| 4363 |
+
},
|
| 4364 |
+
{
|
| 4365 |
+
"epoch": 1.0272005406318634,
|
| 4366 |
+
"grad_norm": 0.4966093599796295,
|
| 4367 |
+
"learning_rate": 0.00018975511096075762,
|
| 4368 |
+
"loss": 4.505655670166016,
|
| 4369 |
+
"step": 6080
|
| 4370 |
+
},
|
| 4371 |
+
{
|
| 4372 |
+
"epoch": 1.0288900152052711,
|
| 4373 |
+
"grad_norm": 0.5429375767707825,
|
| 4374 |
+
"learning_rate": 0.00018929303982055272,
|
| 4375 |
+
"loss": 4.499347305297851,
|
| 4376 |
+
"step": 6090
|
| 4377 |
+
},
|
| 4378 |
+
{
|
| 4379 |
+
"epoch": 1.0305794897786789,
|
| 4380 |
+
"grad_norm": 0.4981507360935211,
|
| 4381 |
+
"learning_rate": 0.00018883056799693125,
|
| 4382 |
+
"loss": 4.461819839477539,
|
| 4383 |
+
"step": 6100
|
| 4384 |
+
},
|
| 4385 |
+
{
|
| 4386 |
+
"epoch": 1.0322689643520866,
|
| 4387 |
+
"grad_norm": 0.5121614336967468,
|
| 4388 |
+
"learning_rate": 0.00018836770020586315,
|
| 4389 |
+
"loss": 4.478689956665039,
|
| 4390 |
+
"step": 6110
|
| 4391 |
+
},
|
| 4392 |
+
{
|
| 4393 |
+
"epoch": 1.033958438925494,
|
| 4394 |
+
"grad_norm": 0.4835728406906128,
|
| 4395 |
+
"learning_rate": 0.00018790444116735595,
|
| 4396 |
+
"loss": 4.47772216796875,
|
| 4397 |
+
"step": 6120
|
| 4398 |
+
},
|
| 4399 |
+
{
|
| 4400 |
+
"epoch": 1.0356479134989018,
|
| 4401 |
+
"grad_norm": 0.4881206154823303,
|
| 4402 |
+
"learning_rate": 0.00018744079560540695,
|
| 4403 |
+
"loss": 4.479801177978516,
|
| 4404 |
+
"step": 6130
|
| 4405 |
+
},
|
| 4406 |
+
{
|
| 4407 |
+
"epoch": 1.0373373880723096,
|
| 4408 |
+
"grad_norm": 0.47434431314468384,
|
| 4409 |
+
"learning_rate": 0.000186976768247955,
|
| 4410 |
+
"loss": 4.480235290527344,
|
| 4411 |
+
"step": 6140
|
| 4412 |
+
},
|
| 4413 |
+
{
|
| 4414 |
+
"epoch": 1.039026862645717,
|
| 4415 |
+
"grad_norm": 0.48258504271507263,
|
| 4416 |
+
"learning_rate": 0.00018651236382683225,
|
| 4417 |
+
"loss": 4.469864273071289,
|
| 4418 |
+
"step": 6150
|
| 4419 |
+
},
|
| 4420 |
+
{
|
| 4421 |
+
"epoch": 1.0407163372191248,
|
| 4422 |
+
"grad_norm": 0.5025637745857239,
|
| 4423 |
+
"learning_rate": 0.0001860475870777157,
|
| 4424 |
+
"loss": 4.472750091552735,
|
| 4425 |
+
"step": 6160
|
| 4426 |
+
},
|
| 4427 |
+
{
|
| 4428 |
+
"epoch": 1.0424058117925326,
|
| 4429 |
+
"grad_norm": 0.4636594355106354,
|
| 4430 |
+
"learning_rate": 0.0001855824427400793,
|
| 4431 |
+
"loss": 4.450835418701172,
|
| 4432 |
+
"step": 6170
|
| 4433 |
+
},
|
| 4434 |
+
{
|
| 4435 |
+
"epoch": 1.0440952863659403,
|
| 4436 |
+
"grad_norm": 0.4901501536369324,
|
| 4437 |
+
"learning_rate": 0.00018511693555714535,
|
| 4438 |
+
"loss": 4.490735626220703,
|
| 4439 |
+
"step": 6180
|
| 4440 |
+
},
|
| 4441 |
+
{
|
| 4442 |
+
"epoch": 1.0457847609393478,
|
| 4443 |
+
"grad_norm": 0.5198561549186707,
|
| 4444 |
+
"learning_rate": 0.00018465107027583615,
|
| 4445 |
+
"loss": 4.474180221557617,
|
| 4446 |
+
"step": 6190
|
| 4447 |
+
},
|
| 4448 |
+
{
|
| 4449 |
+
"epoch": 1.0474742355127555,
|
| 4450 |
+
"grad_norm": 0.4723539352416992,
|
| 4451 |
+
"learning_rate": 0.00018418485164672574,
|
| 4452 |
+
"loss": 4.4745361328125,
|
| 4453 |
+
"step": 6200
|
| 4454 |
+
},
|
| 4455 |
+
{
|
| 4456 |
+
"epoch": 1.0491637100861633,
|
| 4457 |
+
"grad_norm": 0.5074954628944397,
|
| 4458 |
+
"learning_rate": 0.00018371828442399128,
|
| 4459 |
+
"loss": 4.469810485839844,
|
| 4460 |
+
"step": 6210
|
| 4461 |
+
},
|
| 4462 |
+
{
|
| 4463 |
+
"epoch": 1.0508531846595708,
|
| 4464 |
+
"grad_norm": 0.49918699264526367,
|
| 4465 |
+
"learning_rate": 0.00018325137336536464,
|
| 4466 |
+
"loss": 4.442096710205078,
|
| 4467 |
+
"step": 6220
|
| 4468 |
+
},
|
| 4469 |
+
{
|
| 4470 |
+
"epoch": 1.0525426592329785,
|
| 4471 |
+
"grad_norm": 0.5088530778884888,
|
| 4472 |
+
"learning_rate": 0.00018278412323208392,
|
| 4473 |
+
"loss": 4.484762573242188,
|
| 4474 |
+
"step": 6230
|
| 4475 |
+
},
|
| 4476 |
+
{
|
| 4477 |
+
"epoch": 1.0542321338063863,
|
| 4478 |
+
"grad_norm": 0.506341814994812,
|
| 4479 |
+
"learning_rate": 0.00018231653878884486,
|
| 4480 |
+
"loss": 4.486656188964844,
|
| 4481 |
+
"step": 6240
|
| 4482 |
+
},
|
| 4483 |
+
{
|
| 4484 |
+
"epoch": 1.055921608379794,
|
| 4485 |
+
"grad_norm": 0.5262649059295654,
|
| 4486 |
+
"learning_rate": 0.00018184862480375233,
|
| 4487 |
+
"loss": 4.455668640136719,
|
| 4488 |
+
"step": 6250
|
| 4489 |
+
},
|
| 4490 |
+
{
|
| 4491 |
+
"epoch": 1.0576110829532015,
|
| 4492 |
+
"grad_norm": 0.5115051865577698,
|
| 4493 |
+
"learning_rate": 0.00018138038604827153,
|
| 4494 |
+
"loss": 4.479043960571289,
|
| 4495 |
+
"step": 6260
|
| 4496 |
+
},
|
| 4497 |
+
{
|
| 4498 |
+
"epoch": 1.0593005575266092,
|
| 4499 |
+
"grad_norm": 0.50110924243927,
|
| 4500 |
+
"learning_rate": 0.0001809118272971795,
|
| 4501 |
+
"loss": 4.446685409545898,
|
| 4502 |
+
"step": 6270
|
| 4503 |
+
},
|
| 4504 |
+
{
|
| 4505 |
+
"epoch": 1.060990032100017,
|
| 4506 |
+
"grad_norm": 0.5022484660148621,
|
| 4507 |
+
"learning_rate": 0.0001804429533285164,
|
| 4508 |
+
"loss": 4.4593353271484375,
|
| 4509 |
+
"step": 6280
|
| 4510 |
+
},
|
| 4511 |
+
{
|
| 4512 |
+
"epoch": 1.0626795066734245,
|
| 4513 |
+
"grad_norm": 0.492165744304657,
|
| 4514 |
+
"learning_rate": 0.00017997376892353668,
|
| 4515 |
+
"loss": 4.496971511840821,
|
| 4516 |
+
"step": 6290
|
| 4517 |
+
},
|
| 4518 |
+
{
|
| 4519 |
+
"epoch": 1.0643689812468322,
|
| 4520 |
+
"grad_norm": 0.5134599208831787,
|
| 4521 |
+
"learning_rate": 0.0001795042788666605,
|
| 4522 |
+
"loss": 4.465629196166992,
|
| 4523 |
+
"step": 6300
|
| 4524 |
+
},
|
| 4525 |
+
{
|
| 4526 |
+
"epoch": 1.06605845582024,
|
| 4527 |
+
"grad_norm": 0.5151488184928894,
|
| 4528 |
+
"learning_rate": 0.00017903448794542488,
|
| 4529 |
+
"loss": 4.454899597167969,
|
| 4530 |
+
"step": 6310
|
| 4531 |
+
},
|
| 4532 |
+
{
|
| 4533 |
+
"epoch": 1.0677479303936477,
|
| 4534 |
+
"grad_norm": 0.5240500569343567,
|
| 4535 |
+
"learning_rate": 0.00017856440095043464,
|
| 4536 |
+
"loss": 4.481625747680664,
|
| 4537 |
+
"step": 6320
|
| 4538 |
+
},
|
| 4539 |
+
{
|
| 4540 |
+
"epoch": 1.0694374049670552,
|
| 4541 |
+
"grad_norm": 0.5187123417854309,
|
| 4542 |
+
"learning_rate": 0.00017809402267531405,
|
| 4543 |
+
"loss": 4.437789535522461,
|
| 4544 |
+
"step": 6330
|
| 4545 |
+
},
|
| 4546 |
+
{
|
| 4547 |
+
"epoch": 1.071126879540463,
|
| 4548 |
+
"grad_norm": 0.4693409502506256,
|
| 4549 |
+
"learning_rate": 0.00017762335791665735,
|
| 4550 |
+
"loss": 4.450423812866211,
|
| 4551 |
+
"step": 6340
|
| 4552 |
+
},
|
| 4553 |
+
{
|
| 4554 |
+
"epoch": 1.0728163541138707,
|
| 4555 |
+
"grad_norm": 0.5061246752738953,
|
| 4556 |
+
"learning_rate": 0.00017715241147398035,
|
| 4557 |
+
"loss": 4.46313705444336,
|
| 4558 |
+
"step": 6350
|
| 4559 |
+
},
|
| 4560 |
+
{
|
| 4561 |
+
"epoch": 1.0745058286872782,
|
| 4562 |
+
"grad_norm": 0.47927796840667725,
|
| 4563 |
+
"learning_rate": 0.00017668118814967126,
|
| 4564 |
+
"loss": 4.446915817260742,
|
| 4565 |
+
"step": 6360
|
| 4566 |
+
},
|
| 4567 |
+
{
|
| 4568 |
+
"epoch": 1.076195303260686,
|
| 4569 |
+
"grad_norm": 0.47587907314300537,
|
| 4570 |
+
"learning_rate": 0.00017620969274894163,
|
| 4571 |
+
"loss": 4.461398696899414,
|
| 4572 |
+
"step": 6370
|
| 4573 |
+
},
|
| 4574 |
+
{
|
| 4575 |
+
"epoch": 1.0778847778340936,
|
| 4576 |
+
"grad_norm": 0.5091392397880554,
|
| 4577 |
+
"learning_rate": 0.00017573793007977763,
|
| 4578 |
+
"loss": 4.450970458984375,
|
| 4579 |
+
"step": 6380
|
| 4580 |
+
},
|
| 4581 |
+
{
|
| 4582 |
+
"epoch": 1.0795742524075012,
|
| 4583 |
+
"grad_norm": 0.5105127692222595,
|
| 4584 |
+
"learning_rate": 0.0001752659049528906,
|
| 4585 |
+
"loss": 4.458657455444336,
|
| 4586 |
+
"step": 6390
|
| 4587 |
+
},
|
| 4588 |
+
{
|
| 4589 |
+
"epoch": 1.081263726980909,
|
| 4590 |
+
"grad_norm": 0.5196726322174072,
|
| 4591 |
+
"learning_rate": 0.00017479362218166854,
|
| 4592 |
+
"loss": 4.444008636474609,
|
| 4593 |
+
"step": 6400
|
| 4594 |
+
},
|
| 4595 |
+
{
|
| 4596 |
+
"epoch": 1.0829532015543166,
|
| 4597 |
+
"grad_norm": 0.4891359210014343,
|
| 4598 |
+
"learning_rate": 0.0001743210865821265,
|
| 4599 |
+
"loss": 4.436445236206055,
|
| 4600 |
+
"step": 6410
|
| 4601 |
+
},
|
| 4602 |
+
{
|
| 4603 |
+
"epoch": 1.0846426761277244,
|
| 4604 |
+
"grad_norm": 0.5141095519065857,
|
| 4605 |
+
"learning_rate": 0.0001738483029728578,
|
| 4606 |
+
"loss": 4.455481338500976,
|
| 4607 |
+
"step": 6420
|
| 4608 |
+
},
|
| 4609 |
+
{
|
| 4610 |
+
"epoch": 1.0863321507011319,
|
| 4611 |
+
"grad_norm": 0.5223525166511536,
|
| 4612 |
+
"learning_rate": 0.00017337527617498474,
|
| 4613 |
+
"loss": 4.485405731201172,
|
| 4614 |
+
"step": 6430
|
| 4615 |
+
},
|
| 4616 |
+
{
|
| 4617 |
+
"epoch": 1.0880216252745396,
|
| 4618 |
+
"grad_norm": 0.4939091205596924,
|
| 4619 |
+
"learning_rate": 0.0001729020110121096,
|
| 4620 |
+
"loss": 4.448784255981446,
|
| 4621 |
+
"step": 6440
|
| 4622 |
+
},
|
| 4623 |
+
{
|
| 4624 |
+
"epoch": 1.0897110998479473,
|
| 4625 |
+
"grad_norm": 0.49695253372192383,
|
| 4626 |
+
"learning_rate": 0.0001724285123102652,
|
| 4627 |
+
"loss": 4.4587146759033205,
|
| 4628 |
+
"step": 6450
|
| 4629 |
+
},
|
| 4630 |
+
{
|
| 4631 |
+
"epoch": 1.091400574421355,
|
| 4632 |
+
"grad_norm": 0.4882517158985138,
|
| 4633 |
+
"learning_rate": 0.00017195478489786593,
|
| 4634 |
+
"loss": 4.43580207824707,
|
| 4635 |
+
"step": 6460
|
| 4636 |
+
},
|
| 4637 |
+
{
|
| 4638 |
+
"epoch": 1.0930900489947626,
|
| 4639 |
+
"grad_norm": 0.4971882998943329,
|
| 4640 |
+
"learning_rate": 0.00017148083360565836,
|
| 4641 |
+
"loss": 4.436479949951172,
|
| 4642 |
+
"step": 6470
|
| 4643 |
+
},
|
| 4644 |
+
{
|
| 4645 |
+
"epoch": 1.0947795235681703,
|
| 4646 |
+
"grad_norm": 0.4835260808467865,
|
| 4647 |
+
"learning_rate": 0.00017100666326667202,
|
| 4648 |
+
"loss": 4.476963043212891,
|
| 4649 |
+
"step": 6480
|
| 4650 |
+
},
|
| 4651 |
+
{
|
| 4652 |
+
"epoch": 1.096468998141578,
|
| 4653 |
+
"grad_norm": 0.4847490191459656,
|
| 4654 |
+
"learning_rate": 0.00017053227871617027,
|
| 4655 |
+
"loss": 4.449015426635742,
|
| 4656 |
+
"step": 6490
|
| 4657 |
+
},
|
| 4658 |
+
{
|
| 4659 |
+
"epoch": 1.0981584727149856,
|
| 4660 |
+
"grad_norm": 0.5305824279785156,
|
| 4661 |
+
"learning_rate": 0.00017005768479160064,
|
| 4662 |
+
"loss": 4.452330780029297,
|
| 4663 |
+
"step": 6500
|
| 4664 |
+
},
|
| 4665 |
+
{
|
| 4666 |
+
"epoch": 1.0981584727149856,
|
| 4667 |
+
"eval_loss": 4.447469711303711,
|
| 4668 |
+
"eval_runtime": 4.0239,
|
| 4669 |
+
"eval_samples_per_second": 248.518,
|
| 4670 |
+
"eval_steps_per_second": 5.219,
|
| 4671 |
+
"step": 6500
|
| 4672 |
}
|
| 4673 |
],
|
| 4674 |
"logging_steps": 10,
|
|
|
|
| 4688 |
"attributes": {}
|
| 4689 |
}
|
| 4690 |
},
|
| 4691 |
+
"total_flos": 2.1739484320314163e+17,
|
| 4692 |
"train_batch_size": 48,
|
| 4693 |
"trial_name": null,
|
| 4694 |
"trial_params": null
|