Training in progress, step 6500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 328277848
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ccb4549408e52f631e5a2754236ea70999d0d21bd6cdb0e3578808e3ad0ec0af
|
| 3 |
size 328277848
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 318646859
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d5fa1630ee9673533bbca0fabe5cc81512e307a45647863ef671c972b6a648c2
|
| 3 |
size 318646859
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b04f884e61b89876d8b9b16b9a44bf2c7f027c2c95e35ca0aba5b86933c2288c
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ad5a3c7ee6384cdea60f7a41957135fc1d6a8e0bdd3b9a0dd5c4c46f69d638ec
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4311,6 +4311,364 @@
|
|
| 4311 |
"eval_samples_per_second": 276.003,
|
| 4312 |
"eval_steps_per_second": 5.796,
|
| 4313 |
"step": 6000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4314 |
}
|
| 4315 |
],
|
| 4316 |
"logging_steps": 10,
|
|
@@ -4330,7 +4688,7 @@
|
|
| 4330 |
"attributes": {}
|
| 4331 |
}
|
| 4332 |
},
|
| 4333 |
-
"total_flos": 2.
|
| 4334 |
"train_batch_size": 48,
|
| 4335 |
"trial_name": null,
|
| 4336 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.0981584727149856,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 6500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4311 |
"eval_samples_per_second": 276.003,
|
| 4312 |
"eval_steps_per_second": 5.796,
|
| 4313 |
"step": 6000
|
| 4314 |
+
},
|
| 4315 |
+
{
|
| 4316 |
+
"epoch": 1.0153742186180097,
|
| 4317 |
+
"grad_norm": 0.518569827079773,
|
| 4318 |
+
"learning_rate": 0.00019297799453520028,
|
| 4319 |
+
"loss": 4.499275207519531,
|
| 4320 |
+
"step": 6010
|
| 4321 |
+
},
|
| 4322 |
+
{
|
| 4323 |
+
"epoch": 1.0170636931914174,
|
| 4324 |
+
"grad_norm": 0.5655678510665894,
|
| 4325 |
+
"learning_rate": 0.00019251885987680252,
|
| 4326 |
+
"loss": 4.483604049682617,
|
| 4327 |
+
"step": 6020
|
| 4328 |
+
},
|
| 4329 |
+
{
|
| 4330 |
+
"epoch": 1.0187531677648252,
|
| 4331 |
+
"grad_norm": 0.5033740997314453,
|
| 4332 |
+
"learning_rate": 0.00019205929164029217,
|
| 4333 |
+
"loss": 4.474781036376953,
|
| 4334 |
+
"step": 6030
|
| 4335 |
+
},
|
| 4336 |
+
{
|
| 4337 |
+
"epoch": 1.020442642338233,
|
| 4338 |
+
"grad_norm": 0.5125960111618042,
|
| 4339 |
+
"learning_rate": 0.00019159929451203033,
|
| 4340 |
+
"loss": 4.485195922851562,
|
| 4341 |
+
"step": 6040
|
| 4342 |
+
},
|
| 4343 |
+
{
|
| 4344 |
+
"epoch": 1.0221321169116404,
|
| 4345 |
+
"grad_norm": 0.5002242922782898,
|
| 4346 |
+
"learning_rate": 0.00019113887318275149,
|
| 4347 |
+
"loss": 4.486893081665039,
|
| 4348 |
+
"step": 6050
|
| 4349 |
+
},
|
| 4350 |
+
{
|
| 4351 |
+
"epoch": 1.0238215914850481,
|
| 4352 |
+
"grad_norm": 0.48877793550491333,
|
| 4353 |
+
"learning_rate": 0.00019067803234751603,
|
| 4354 |
+
"loss": 4.473563766479492,
|
| 4355 |
+
"step": 6060
|
| 4356 |
+
},
|
| 4357 |
+
{
|
| 4358 |
+
"epoch": 1.0255110660584559,
|
| 4359 |
+
"grad_norm": 0.485661119222641,
|
| 4360 |
+
"learning_rate": 0.00019021677670566208,
|
| 4361 |
+
"loss": 4.469658660888672,
|
| 4362 |
+
"step": 6070
|
| 4363 |
+
},
|
| 4364 |
+
{
|
| 4365 |
+
"epoch": 1.0272005406318634,
|
| 4366 |
+
"grad_norm": 0.5000821352005005,
|
| 4367 |
+
"learning_rate": 0.00018975511096075762,
|
| 4368 |
+
"loss": 4.504412078857422,
|
| 4369 |
+
"step": 6080
|
| 4370 |
+
},
|
| 4371 |
+
{
|
| 4372 |
+
"epoch": 1.0288900152052711,
|
| 4373 |
+
"grad_norm": 0.5075719356536865,
|
| 4374 |
+
"learning_rate": 0.00018929303982055272,
|
| 4375 |
+
"loss": 4.497782135009766,
|
| 4376 |
+
"step": 6090
|
| 4377 |
+
},
|
| 4378 |
+
{
|
| 4379 |
+
"epoch": 1.0305794897786789,
|
| 4380 |
+
"grad_norm": 0.477532297372818,
|
| 4381 |
+
"learning_rate": 0.00018883056799693125,
|
| 4382 |
+
"loss": 4.46100082397461,
|
| 4383 |
+
"step": 6100
|
| 4384 |
+
},
|
| 4385 |
+
{
|
| 4386 |
+
"epoch": 1.0322689643520866,
|
| 4387 |
+
"grad_norm": 0.5213661789894104,
|
| 4388 |
+
"learning_rate": 0.00018836770020586315,
|
| 4389 |
+
"loss": 4.476996612548828,
|
| 4390 |
+
"step": 6110
|
| 4391 |
+
},
|
| 4392 |
+
{
|
| 4393 |
+
"epoch": 1.033958438925494,
|
| 4394 |
+
"grad_norm": 0.5093067288398743,
|
| 4395 |
+
"learning_rate": 0.00018790444116735595,
|
| 4396 |
+
"loss": 4.477323150634765,
|
| 4397 |
+
"step": 6120
|
| 4398 |
+
},
|
| 4399 |
+
{
|
| 4400 |
+
"epoch": 1.0356479134989018,
|
| 4401 |
+
"grad_norm": 0.480839341878891,
|
| 4402 |
+
"learning_rate": 0.00018744079560540695,
|
| 4403 |
+
"loss": 4.478923797607422,
|
| 4404 |
+
"step": 6130
|
| 4405 |
+
},
|
| 4406 |
+
{
|
| 4407 |
+
"epoch": 1.0373373880723096,
|
| 4408 |
+
"grad_norm": 0.47398701310157776,
|
| 4409 |
+
"learning_rate": 0.000186976768247955,
|
| 4410 |
+
"loss": 4.478921508789062,
|
| 4411 |
+
"step": 6140
|
| 4412 |
+
},
|
| 4413 |
+
{
|
| 4414 |
+
"epoch": 1.039026862645717,
|
| 4415 |
+
"grad_norm": 0.4890805780887604,
|
| 4416 |
+
"learning_rate": 0.00018651236382683225,
|
| 4417 |
+
"loss": 4.468624877929687,
|
| 4418 |
+
"step": 6150
|
| 4419 |
+
},
|
| 4420 |
+
{
|
| 4421 |
+
"epoch": 1.0407163372191248,
|
| 4422 |
+
"grad_norm": 0.49367958307266235,
|
| 4423 |
+
"learning_rate": 0.0001860475870777157,
|
| 4424 |
+
"loss": 4.472190475463867,
|
| 4425 |
+
"step": 6160
|
| 4426 |
+
},
|
| 4427 |
+
{
|
| 4428 |
+
"epoch": 1.0424058117925326,
|
| 4429 |
+
"grad_norm": 0.4590769112110138,
|
| 4430 |
+
"learning_rate": 0.0001855824427400793,
|
| 4431 |
+
"loss": 4.449500274658203,
|
| 4432 |
+
"step": 6170
|
| 4433 |
+
},
|
| 4434 |
+
{
|
| 4435 |
+
"epoch": 1.0440952863659403,
|
| 4436 |
+
"grad_norm": 0.4810253381729126,
|
| 4437 |
+
"learning_rate": 0.00018511693555714535,
|
| 4438 |
+
"loss": 4.490542221069336,
|
| 4439 |
+
"step": 6180
|
| 4440 |
+
},
|
| 4441 |
+
{
|
| 4442 |
+
"epoch": 1.0457847609393478,
|
| 4443 |
+
"grad_norm": 0.5299515128135681,
|
| 4444 |
+
"learning_rate": 0.00018465107027583615,
|
| 4445 |
+
"loss": 4.474026489257812,
|
| 4446 |
+
"step": 6190
|
| 4447 |
+
},
|
| 4448 |
+
{
|
| 4449 |
+
"epoch": 1.0474742355127555,
|
| 4450 |
+
"grad_norm": 0.4833298623561859,
|
| 4451 |
+
"learning_rate": 0.00018418485164672574,
|
| 4452 |
+
"loss": 4.473223114013672,
|
| 4453 |
+
"step": 6200
|
| 4454 |
+
},
|
| 4455 |
+
{
|
| 4456 |
+
"epoch": 1.0491637100861633,
|
| 4457 |
+
"grad_norm": 0.4987802803516388,
|
| 4458 |
+
"learning_rate": 0.00018371828442399128,
|
| 4459 |
+
"loss": 4.467764663696289,
|
| 4460 |
+
"step": 6210
|
| 4461 |
+
},
|
| 4462 |
+
{
|
| 4463 |
+
"epoch": 1.0508531846595708,
|
| 4464 |
+
"grad_norm": 0.49086934328079224,
|
| 4465 |
+
"learning_rate": 0.00018325137336536464,
|
| 4466 |
+
"loss": 4.441515350341797,
|
| 4467 |
+
"step": 6220
|
| 4468 |
+
},
|
| 4469 |
+
{
|
| 4470 |
+
"epoch": 1.0525426592329785,
|
| 4471 |
+
"grad_norm": 0.5031701326370239,
|
| 4472 |
+
"learning_rate": 0.00018278412323208392,
|
| 4473 |
+
"loss": 4.483510208129883,
|
| 4474 |
+
"step": 6230
|
| 4475 |
+
},
|
| 4476 |
+
{
|
| 4477 |
+
"epoch": 1.0542321338063863,
|
| 4478 |
+
"grad_norm": 0.509184718132019,
|
| 4479 |
+
"learning_rate": 0.00018231653878884486,
|
| 4480 |
+
"loss": 4.485199356079102,
|
| 4481 |
+
"step": 6240
|
| 4482 |
+
},
|
| 4483 |
+
{
|
| 4484 |
+
"epoch": 1.055921608379794,
|
| 4485 |
+
"grad_norm": 0.48335397243499756,
|
| 4486 |
+
"learning_rate": 0.00018184862480375233,
|
| 4487 |
+
"loss": 4.454570388793945,
|
| 4488 |
+
"step": 6250
|
| 4489 |
+
},
|
| 4490 |
+
{
|
| 4491 |
+
"epoch": 1.0576110829532015,
|
| 4492 |
+
"grad_norm": 0.5146468281745911,
|
| 4493 |
+
"learning_rate": 0.00018138038604827153,
|
| 4494 |
+
"loss": 4.477815628051758,
|
| 4495 |
+
"step": 6260
|
| 4496 |
+
},
|
| 4497 |
+
{
|
| 4498 |
+
"epoch": 1.0593005575266092,
|
| 4499 |
+
"grad_norm": 0.5049527883529663,
|
| 4500 |
+
"learning_rate": 0.0001809118272971795,
|
| 4501 |
+
"loss": 4.445434951782227,
|
| 4502 |
+
"step": 6270
|
| 4503 |
+
},
|
| 4504 |
+
{
|
| 4505 |
+
"epoch": 1.060990032100017,
|
| 4506 |
+
"grad_norm": 0.47304192185401917,
|
| 4507 |
+
"learning_rate": 0.0001804429533285164,
|
| 4508 |
+
"loss": 4.458169555664062,
|
| 4509 |
+
"step": 6280
|
| 4510 |
+
},
|
| 4511 |
+
{
|
| 4512 |
+
"epoch": 1.0626795066734245,
|
| 4513 |
+
"grad_norm": 0.4755364954471588,
|
| 4514 |
+
"learning_rate": 0.00017997376892353668,
|
| 4515 |
+
"loss": 4.495440292358398,
|
| 4516 |
+
"step": 6290
|
| 4517 |
+
},
|
| 4518 |
+
{
|
| 4519 |
+
"epoch": 1.0643689812468322,
|
| 4520 |
+
"grad_norm": 0.49506038427352905,
|
| 4521 |
+
"learning_rate": 0.0001795042788666605,
|
| 4522 |
+
"loss": 4.4639404296875,
|
| 4523 |
+
"step": 6300
|
| 4524 |
+
},
|
| 4525 |
+
{
|
| 4526 |
+
"epoch": 1.06605845582024,
|
| 4527 |
+
"grad_norm": 0.5216291546821594,
|
| 4528 |
+
"learning_rate": 0.00017903448794542488,
|
| 4529 |
+
"loss": 4.4542278289794925,
|
| 4530 |
+
"step": 6310
|
| 4531 |
+
},
|
| 4532 |
+
{
|
| 4533 |
+
"epoch": 1.0677479303936477,
|
| 4534 |
+
"grad_norm": 0.5284595489501953,
|
| 4535 |
+
"learning_rate": 0.00017856440095043464,
|
| 4536 |
+
"loss": 4.479632186889648,
|
| 4537 |
+
"step": 6320
|
| 4538 |
+
},
|
| 4539 |
+
{
|
| 4540 |
+
"epoch": 1.0694374049670552,
|
| 4541 |
+
"grad_norm": 0.5182107090950012,
|
| 4542 |
+
"learning_rate": 0.00017809402267531405,
|
| 4543 |
+
"loss": 4.4362133026123045,
|
| 4544 |
+
"step": 6330
|
| 4545 |
+
},
|
| 4546 |
+
{
|
| 4547 |
+
"epoch": 1.071126879540463,
|
| 4548 |
+
"grad_norm": 0.5018042922019958,
|
| 4549 |
+
"learning_rate": 0.00017762335791665735,
|
| 4550 |
+
"loss": 4.452248001098633,
|
| 4551 |
+
"step": 6340
|
| 4552 |
+
},
|
| 4553 |
+
{
|
| 4554 |
+
"epoch": 1.0728163541138707,
|
| 4555 |
+
"grad_norm": 0.5280482172966003,
|
| 4556 |
+
"learning_rate": 0.00017715241147398035,
|
| 4557 |
+
"loss": 4.464836120605469,
|
| 4558 |
+
"step": 6350
|
| 4559 |
+
},
|
| 4560 |
+
{
|
| 4561 |
+
"epoch": 1.0745058286872782,
|
| 4562 |
+
"grad_norm": 0.47761428356170654,
|
| 4563 |
+
"learning_rate": 0.00017668118814967126,
|
| 4564 |
+
"loss": 4.447597503662109,
|
| 4565 |
+
"step": 6360
|
| 4566 |
+
},
|
| 4567 |
+
{
|
| 4568 |
+
"epoch": 1.076195303260686,
|
| 4569 |
+
"grad_norm": 0.4841929078102112,
|
| 4570 |
+
"learning_rate": 0.00017620969274894163,
|
| 4571 |
+
"loss": 4.4613292694091795,
|
| 4572 |
+
"step": 6370
|
| 4573 |
+
},
|
| 4574 |
+
{
|
| 4575 |
+
"epoch": 1.0778847778340936,
|
| 4576 |
+
"grad_norm": 0.5038534998893738,
|
| 4577 |
+
"learning_rate": 0.00017573793007977763,
|
| 4578 |
+
"loss": 4.451330184936523,
|
| 4579 |
+
"step": 6380
|
| 4580 |
+
},
|
| 4581 |
+
{
|
| 4582 |
+
"epoch": 1.0795742524075012,
|
| 4583 |
+
"grad_norm": 0.5004971027374268,
|
| 4584 |
+
"learning_rate": 0.0001752659049528906,
|
| 4585 |
+
"loss": 4.457633972167969,
|
| 4586 |
+
"step": 6390
|
| 4587 |
+
},
|
| 4588 |
+
{
|
| 4589 |
+
"epoch": 1.081263726980909,
|
| 4590 |
+
"grad_norm": 0.5123668909072876,
|
| 4591 |
+
"learning_rate": 0.00017479362218166854,
|
| 4592 |
+
"loss": 4.443200302124024,
|
| 4593 |
+
"step": 6400
|
| 4594 |
+
},
|
| 4595 |
+
{
|
| 4596 |
+
"epoch": 1.0829532015543166,
|
| 4597 |
+
"grad_norm": 0.5099160075187683,
|
| 4598 |
+
"learning_rate": 0.0001743210865821265,
|
| 4599 |
+
"loss": 4.436219787597656,
|
| 4600 |
+
"step": 6410
|
| 4601 |
+
},
|
| 4602 |
+
{
|
| 4603 |
+
"epoch": 1.0846426761277244,
|
| 4604 |
+
"grad_norm": 0.5162463784217834,
|
| 4605 |
+
"learning_rate": 0.0001738483029728578,
|
| 4606 |
+
"loss": 4.45533561706543,
|
| 4607 |
+
"step": 6420
|
| 4608 |
+
},
|
| 4609 |
+
{
|
| 4610 |
+
"epoch": 1.0863321507011319,
|
| 4611 |
+
"grad_norm": 0.5178755521774292,
|
| 4612 |
+
"learning_rate": 0.00017337527617498474,
|
| 4613 |
+
"loss": 4.48522720336914,
|
| 4614 |
+
"step": 6430
|
| 4615 |
+
},
|
| 4616 |
+
{
|
| 4617 |
+
"epoch": 1.0880216252745396,
|
| 4618 |
+
"grad_norm": 0.49394717812538147,
|
| 4619 |
+
"learning_rate": 0.0001729020110121096,
|
| 4620 |
+
"loss": 4.447189712524414,
|
| 4621 |
+
"step": 6440
|
| 4622 |
+
},
|
| 4623 |
+
{
|
| 4624 |
+
"epoch": 1.0897110998479473,
|
| 4625 |
+
"grad_norm": 0.4908885955810547,
|
| 4626 |
+
"learning_rate": 0.0001724285123102652,
|
| 4627 |
+
"loss": 4.457671737670898,
|
| 4628 |
+
"step": 6450
|
| 4629 |
+
},
|
| 4630 |
+
{
|
| 4631 |
+
"epoch": 1.091400574421355,
|
| 4632 |
+
"grad_norm": 0.5045267343521118,
|
| 4633 |
+
"learning_rate": 0.00017195478489786593,
|
| 4634 |
+
"loss": 4.435376358032227,
|
| 4635 |
+
"step": 6460
|
| 4636 |
+
},
|
| 4637 |
+
{
|
| 4638 |
+
"epoch": 1.0930900489947626,
|
| 4639 |
+
"grad_norm": 0.5065691471099854,
|
| 4640 |
+
"learning_rate": 0.00017148083360565836,
|
| 4641 |
+
"loss": 4.435953903198242,
|
| 4642 |
+
"step": 6470
|
| 4643 |
+
},
|
| 4644 |
+
{
|
| 4645 |
+
"epoch": 1.0947795235681703,
|
| 4646 |
+
"grad_norm": 0.4825722575187683,
|
| 4647 |
+
"learning_rate": 0.00017100666326667202,
|
| 4648 |
+
"loss": 4.4766490936279295,
|
| 4649 |
+
"step": 6480
|
| 4650 |
+
},
|
| 4651 |
+
{
|
| 4652 |
+
"epoch": 1.096468998141578,
|
| 4653 |
+
"grad_norm": 0.4787653982639313,
|
| 4654 |
+
"learning_rate": 0.00017053227871617027,
|
| 4655 |
+
"loss": 4.448079299926758,
|
| 4656 |
+
"step": 6490
|
| 4657 |
+
},
|
| 4658 |
+
{
|
| 4659 |
+
"epoch": 1.0981584727149856,
|
| 4660 |
+
"grad_norm": 0.5119611024856567,
|
| 4661 |
+
"learning_rate": 0.00017005768479160064,
|
| 4662 |
+
"loss": 4.452360534667969,
|
| 4663 |
+
"step": 6500
|
| 4664 |
+
},
|
| 4665 |
+
{
|
| 4666 |
+
"epoch": 1.0981584727149856,
|
| 4667 |
+
"eval_loss": 4.429732799530029,
|
| 4668 |
+
"eval_runtime": 3.6558,
|
| 4669 |
+
"eval_samples_per_second": 273.54,
|
| 4670 |
+
"eval_steps_per_second": 5.744,
|
| 4671 |
+
"step": 6500
|
| 4672 |
}
|
| 4673 |
],
|
| 4674 |
"logging_steps": 10,
|
|
|
|
| 4688 |
"attributes": {}
|
| 4689 |
}
|
| 4690 |
},
|
| 4691 |
+
"total_flos": 2.1739484320314163e+17,
|
| 4692 |
"train_batch_size": 48,
|
| 4693 |
"trial_name": null,
|
| 4694 |
"trial_params": null
|