Training in progress, epoch 0, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step3600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3600/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3600/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3600/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3600/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +316 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1037269336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f12ce7779d96a024c4dd4f58d076b05867f31b520868639145f1b25c63bf1906
|
| 3 |
size 1037269336
|
last-checkpoint/global_step3600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b616c059747eaccce047c3112e296ed72258ba5d5394b0108a0212546122845
|
| 3 |
+
size 781993445
|
last-checkpoint/global_step3600/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3a0874b23896609b2b966f82c616126aa647394a112baa83b98e5d2062c88c9
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step3600/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:48fa9a4ed997374aade9c8737caed2aecf614bb3b3706b3d5f2e66eaf40351ff
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step3600/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67535a7ee65e672147ce20bec6495dce79103690259ea40e50c21fc33d4a4953
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step3600/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a25fb5ddf9c1af2d5420efdae26d4b9821cf64a076b842481731c6ebb07d1b3d
|
| 3 |
+
size 2610290277
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step3600
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4560423a884b4db453d7d1b748155a1cd58f131c7e355290b17af66a745e3b19
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8985cb46ba2723280ee973b265fe66bd4b26b2de0dd0dbbe501d8869c79c0a4c
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5ad915f857347045218dcd0e5ba757cb7b726c9623b0b49d51280ce11e9c427
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a2a0ea073b71609cb1e29e7290795e9416839e4a2e0d6ed6b40688c05f20303
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab73268050baa090e15858a3a718e94ea470376fdd081db33931bc775fd1484a
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 1.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -5312,6 +5312,318 @@
|
|
| 5312 |
"eval_samples_per_second": 174.212,
|
| 5313 |
"eval_steps_per_second": 10.925,
|
| 5314 |
"step": 3400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5315 |
}
|
| 5316 |
],
|
| 5317 |
"logging_steps": 5,
|
|
@@ -5340,7 +5652,7 @@
|
|
| 5340 |
"attributes": {}
|
| 5341 |
}
|
| 5342 |
},
|
| 5343 |
-
"total_flos":
|
| 5344 |
"train_batch_size": 4,
|
| 5345 |
"trial_name": null,
|
| 5346 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 1.9708884954452515,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.5233318796336677,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 3600,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 5312 |
"eval_samples_per_second": 174.212,
|
| 5313 |
"eval_steps_per_second": 10.925,
|
| 5314 |
"step": 3400
|
| 5315 |
+
},
|
| 5316 |
+
{
|
| 5317 |
+
"epoch": 0.4949847361535107,
|
| 5318 |
+
"grad_norm": 2.4753811359405518,
|
| 5319 |
+
"learning_rate": 7.509245947645659e-05,
|
| 5320 |
+
"loss": 1.9676,
|
| 5321 |
+
"step": 3405
|
| 5322 |
+
},
|
| 5323 |
+
{
|
| 5324 |
+
"epoch": 0.49571158598633525,
|
| 5325 |
+
"grad_norm": 2.552577495574951,
|
| 5326 |
+
"learning_rate": 7.502353796314939e-05,
|
| 5327 |
+
"loss": 2.0703,
|
| 5328 |
+
"step": 3410
|
| 5329 |
+
},
|
| 5330 |
+
{
|
| 5331 |
+
"epoch": 0.49643843581915975,
|
| 5332 |
+
"grad_norm": 2.2608258724212646,
|
| 5333 |
+
"learning_rate": 7.495455370075547e-05,
|
| 5334 |
+
"loss": 2.0442,
|
| 5335 |
+
"step": 3415
|
| 5336 |
+
},
|
| 5337 |
+
{
|
| 5338 |
+
"epoch": 0.4971652856519843,
|
| 5339 |
+
"grad_norm": 2.7708072662353516,
|
| 5340 |
+
"learning_rate": 7.488550686294263e-05,
|
| 5341 |
+
"loss": 2.0061,
|
| 5342 |
+
"step": 3420
|
| 5343 |
+
},
|
| 5344 |
+
{
|
| 5345 |
+
"epoch": 0.4978921354848088,
|
| 5346 |
+
"grad_norm": 2.265629768371582,
|
| 5347 |
+
"learning_rate": 7.481639762353621e-05,
|
| 5348 |
+
"loss": 2.0098,
|
| 5349 |
+
"step": 3425
|
| 5350 |
+
},
|
| 5351 |
+
{
|
| 5352 |
+
"epoch": 0.4986189853176334,
|
| 5353 |
+
"grad_norm": 2.200986623764038,
|
| 5354 |
+
"learning_rate": 7.474722615651865e-05,
|
| 5355 |
+
"loss": 2.0711,
|
| 5356 |
+
"step": 3430
|
| 5357 |
+
},
|
| 5358 |
+
{
|
| 5359 |
+
"epoch": 0.49934583515045794,
|
| 5360 |
+
"grad_norm": 2.6096930503845215,
|
| 5361 |
+
"learning_rate": 7.4677992636029e-05,
|
| 5362 |
+
"loss": 2.0267,
|
| 5363 |
+
"step": 3435
|
| 5364 |
+
},
|
| 5365 |
+
{
|
| 5366 |
+
"epoch": 0.5000726849832825,
|
| 5367 |
+
"grad_norm": 2.679610013961792,
|
| 5368 |
+
"learning_rate": 7.460869723636259e-05,
|
| 5369 |
+
"loss": 2.0392,
|
| 5370 |
+
"step": 3440
|
| 5371 |
+
},
|
| 5372 |
+
{
|
| 5373 |
+
"epoch": 0.500799534816107,
|
| 5374 |
+
"grad_norm": 2.4646713733673096,
|
| 5375 |
+
"learning_rate": 7.45393401319705e-05,
|
| 5376 |
+
"loss": 1.9999,
|
| 5377 |
+
"step": 3445
|
| 5378 |
+
},
|
| 5379 |
+
{
|
| 5380 |
+
"epoch": 0.5015263846489315,
|
| 5381 |
+
"grad_norm": 2.341169834136963,
|
| 5382 |
+
"learning_rate": 7.446992149745914e-05,
|
| 5383 |
+
"loss": 2.0061,
|
| 5384 |
+
"step": 3450
|
| 5385 |
+
},
|
| 5386 |
+
{
|
| 5387 |
+
"epoch": 0.5015263846489315,
|
| 5388 |
+
"eval_loss": 1.9797232151031494,
|
| 5389 |
+
"eval_runtime": 21.8402,
|
| 5390 |
+
"eval_samples_per_second": 151.144,
|
| 5391 |
+
"eval_steps_per_second": 9.478,
|
| 5392 |
+
"step": 3450
|
| 5393 |
+
},
|
| 5394 |
+
{
|
| 5395 |
+
"epoch": 0.5022532344817561,
|
| 5396 |
+
"grad_norm": 2.2362568378448486,
|
| 5397 |
+
"learning_rate": 7.440044150758987e-05,
|
| 5398 |
+
"loss": 1.8974,
|
| 5399 |
+
"step": 3455
|
| 5400 |
+
},
|
| 5401 |
+
{
|
| 5402 |
+
"epoch": 0.5029800843145806,
|
| 5403 |
+
"grad_norm": 2.497943878173828,
|
| 5404 |
+
"learning_rate": 7.433090033727847e-05,
|
| 5405 |
+
"loss": 2.178,
|
| 5406 |
+
"step": 3460
|
| 5407 |
+
},
|
| 5408 |
+
{
|
| 5409 |
+
"epoch": 0.5037069341474052,
|
| 5410 |
+
"grad_norm": 2.9573802947998047,
|
| 5411 |
+
"learning_rate": 7.426129816159475e-05,
|
| 5412 |
+
"loss": 2.0595,
|
| 5413 |
+
"step": 3465
|
| 5414 |
+
},
|
| 5415 |
+
{
|
| 5416 |
+
"epoch": 0.5044337839802296,
|
| 5417 |
+
"grad_norm": 2.28165602684021,
|
| 5418 |
+
"learning_rate": 7.419163515576209e-05,
|
| 5419 |
+
"loss": 2.2754,
|
| 5420 |
+
"step": 3470
|
| 5421 |
+
},
|
| 5422 |
+
{
|
| 5423 |
+
"epoch": 0.5051606338130542,
|
| 5424 |
+
"grad_norm": 2.49424147605896,
|
| 5425 |
+
"learning_rate": 7.412191149515707e-05,
|
| 5426 |
+
"loss": 2.1558,
|
| 5427 |
+
"step": 3475
|
| 5428 |
+
},
|
| 5429 |
+
{
|
| 5430 |
+
"epoch": 0.5058874836458788,
|
| 5431 |
+
"grad_norm": 3.0092170238494873,
|
| 5432 |
+
"learning_rate": 7.405212735530888e-05,
|
| 5433 |
+
"loss": 2.1079,
|
| 5434 |
+
"step": 3480
|
| 5435 |
+
},
|
| 5436 |
+
{
|
| 5437 |
+
"epoch": 0.5066143334787033,
|
| 5438 |
+
"grad_norm": 2.4972879886627197,
|
| 5439 |
+
"learning_rate": 7.398228291189901e-05,
|
| 5440 |
+
"loss": 2.181,
|
| 5441 |
+
"step": 3485
|
| 5442 |
+
},
|
| 5443 |
+
{
|
| 5444 |
+
"epoch": 0.5073411833115279,
|
| 5445 |
+
"grad_norm": 2.6351096630096436,
|
| 5446 |
+
"learning_rate": 7.391237834076077e-05,
|
| 5447 |
+
"loss": 1.9635,
|
| 5448 |
+
"step": 3490
|
| 5449 |
+
},
|
| 5450 |
+
{
|
| 5451 |
+
"epoch": 0.5080680331443523,
|
| 5452 |
+
"grad_norm": 2.5686097145080566,
|
| 5453 |
+
"learning_rate": 7.384241381787888e-05,
|
| 5454 |
+
"loss": 2.1353,
|
| 5455 |
+
"step": 3495
|
| 5456 |
+
},
|
| 5457 |
+
{
|
| 5458 |
+
"epoch": 0.5087948829771769,
|
| 5459 |
+
"grad_norm": 2.4493703842163086,
|
| 5460 |
+
"learning_rate": 7.377238951938886e-05,
|
| 5461 |
+
"loss": 2.1474,
|
| 5462 |
+
"step": 3500
|
| 5463 |
+
},
|
| 5464 |
+
{
|
| 5465 |
+
"epoch": 0.5087948829771769,
|
| 5466 |
+
"eval_loss": 1.9865467548370361,
|
| 5467 |
+
"eval_runtime": 19.1097,
|
| 5468 |
+
"eval_samples_per_second": 172.74,
|
| 5469 |
+
"eval_steps_per_second": 10.832,
|
| 5470 |
+
"step": 3500
|
| 5471 |
+
},
|
| 5472 |
+
{
|
| 5473 |
+
"epoch": 0.5095217328100015,
|
| 5474 |
+
"grad_norm": 2.4284589290618896,
|
| 5475 |
+
"learning_rate": 7.370230562157685e-05,
|
| 5476 |
+
"loss": 2.0678,
|
| 5477 |
+
"step": 3505
|
| 5478 |
+
},
|
| 5479 |
+
{
|
| 5480 |
+
"epoch": 0.510248582642826,
|
| 5481 |
+
"grad_norm": 2.635737657546997,
|
| 5482 |
+
"learning_rate": 7.363216230087898e-05,
|
| 5483 |
+
"loss": 2.2497,
|
| 5484 |
+
"step": 3510
|
| 5485 |
+
},
|
| 5486 |
+
{
|
| 5487 |
+
"epoch": 0.5109754324756506,
|
| 5488 |
+
"grad_norm": 2.3156023025512695,
|
| 5489 |
+
"learning_rate": 7.356195973388096e-05,
|
| 5490 |
+
"loss": 2.1084,
|
| 5491 |
+
"step": 3515
|
| 5492 |
+
},
|
| 5493 |
+
{
|
| 5494 |
+
"epoch": 0.511702282308475,
|
| 5495 |
+
"grad_norm": 2.362034559249878,
|
| 5496 |
+
"learning_rate": 7.349169809731767e-05,
|
| 5497 |
+
"loss": 1.9663,
|
| 5498 |
+
"step": 3520
|
| 5499 |
+
},
|
| 5500 |
+
{
|
| 5501 |
+
"epoch": 0.5124291321412996,
|
| 5502 |
+
"grad_norm": 2.198225975036621,
|
| 5503 |
+
"learning_rate": 7.342137756807273e-05,
|
| 5504 |
+
"loss": 1.9753,
|
| 5505 |
+
"step": 3525
|
| 5506 |
+
},
|
| 5507 |
+
{
|
| 5508 |
+
"epoch": 0.5131559819741242,
|
| 5509 |
+
"grad_norm": 2.3297581672668457,
|
| 5510 |
+
"learning_rate": 7.335099832317792e-05,
|
| 5511 |
+
"loss": 1.9516,
|
| 5512 |
+
"step": 3530
|
| 5513 |
+
},
|
| 5514 |
+
{
|
| 5515 |
+
"epoch": 0.5138828318069487,
|
| 5516 |
+
"grad_norm": 2.580559492111206,
|
| 5517 |
+
"learning_rate": 7.328056053981296e-05,
|
| 5518 |
+
"loss": 2.1125,
|
| 5519 |
+
"step": 3535
|
| 5520 |
+
},
|
| 5521 |
+
{
|
| 5522 |
+
"epoch": 0.5146096816397733,
|
| 5523 |
+
"grad_norm": 2.454136371612549,
|
| 5524 |
+
"learning_rate": 7.321006439530488e-05,
|
| 5525 |
+
"loss": 2.1955,
|
| 5526 |
+
"step": 3540
|
| 5527 |
+
},
|
| 5528 |
+
{
|
| 5529 |
+
"epoch": 0.5153365314725977,
|
| 5530 |
+
"grad_norm": 2.720200300216675,
|
| 5531 |
+
"learning_rate": 7.313951006712762e-05,
|
| 5532 |
+
"loss": 2.1802,
|
| 5533 |
+
"step": 3545
|
| 5534 |
+
},
|
| 5535 |
+
{
|
| 5536 |
+
"epoch": 0.5160633813054223,
|
| 5537 |
+
"grad_norm": 2.2702293395996094,
|
| 5538 |
+
"learning_rate": 7.306889773290163e-05,
|
| 5539 |
+
"loss": 2.0275,
|
| 5540 |
+
"step": 3550
|
| 5541 |
+
},
|
| 5542 |
+
{
|
| 5543 |
+
"epoch": 0.5160633813054223,
|
| 5544 |
+
"eval_loss": 1.9806544780731201,
|
| 5545 |
+
"eval_runtime": 19.2538,
|
| 5546 |
+
"eval_samples_per_second": 171.447,
|
| 5547 |
+
"eval_steps_per_second": 10.751,
|
| 5548 |
+
"step": 3550
|
| 5549 |
+
},
|
| 5550 |
+
{
|
| 5551 |
+
"epoch": 0.5167902311382468,
|
| 5552 |
+
"grad_norm": 2.6502344608306885,
|
| 5553 |
+
"learning_rate": 7.299822757039339e-05,
|
| 5554 |
+
"loss": 2.2931,
|
| 5555 |
+
"step": 3555
|
| 5556 |
+
},
|
| 5557 |
+
{
|
| 5558 |
+
"epoch": 0.5175170809710714,
|
| 5559 |
+
"grad_norm": 2.4326069355010986,
|
| 5560 |
+
"learning_rate": 7.292749975751491e-05,
|
| 5561 |
+
"loss": 2.0597,
|
| 5562 |
+
"step": 3560
|
| 5563 |
+
},
|
| 5564 |
+
{
|
| 5565 |
+
"epoch": 0.518243930803896,
|
| 5566 |
+
"grad_norm": 2.45497465133667,
|
| 5567 |
+
"learning_rate": 7.285671447232342e-05,
|
| 5568 |
+
"loss": 2.1446,
|
| 5569 |
+
"step": 3565
|
| 5570 |
+
},
|
| 5571 |
+
{
|
| 5572 |
+
"epoch": 0.5189707806367204,
|
| 5573 |
+
"grad_norm": 2.320857048034668,
|
| 5574 |
+
"learning_rate": 7.278587189302076e-05,
|
| 5575 |
+
"loss": 2.1279,
|
| 5576 |
+
"step": 3570
|
| 5577 |
+
},
|
| 5578 |
+
{
|
| 5579 |
+
"epoch": 0.519697630469545,
|
| 5580 |
+
"grad_norm": 2.6278252601623535,
|
| 5581 |
+
"learning_rate": 7.271497219795305e-05,
|
| 5582 |
+
"loss": 1.9936,
|
| 5583 |
+
"step": 3575
|
| 5584 |
+
},
|
| 5585 |
+
{
|
| 5586 |
+
"epoch": 0.5204244803023695,
|
| 5587 |
+
"grad_norm": 2.3981995582580566,
|
| 5588 |
+
"learning_rate": 7.264401556561019e-05,
|
| 5589 |
+
"loss": 1.9534,
|
| 5590 |
+
"step": 3580
|
| 5591 |
+
},
|
| 5592 |
+
{
|
| 5593 |
+
"epoch": 0.5211513301351941,
|
| 5594 |
+
"grad_norm": 2.486588716506958,
|
| 5595 |
+
"learning_rate": 7.257300217462541e-05,
|
| 5596 |
+
"loss": 2.291,
|
| 5597 |
+
"step": 3585
|
| 5598 |
+
},
|
| 5599 |
+
{
|
| 5600 |
+
"epoch": 0.5218781799680187,
|
| 5601 |
+
"grad_norm": 2.3635659217834473,
|
| 5602 |
+
"learning_rate": 7.250193220377486e-05,
|
| 5603 |
+
"loss": 1.9516,
|
| 5604 |
+
"step": 3590
|
| 5605 |
+
},
|
| 5606 |
+
{
|
| 5607 |
+
"epoch": 0.5226050298008431,
|
| 5608 |
+
"grad_norm": 2.548090934753418,
|
| 5609 |
+
"learning_rate": 7.243080583197707e-05,
|
| 5610 |
+
"loss": 2.0224,
|
| 5611 |
+
"step": 3595
|
| 5612 |
+
},
|
| 5613 |
+
{
|
| 5614 |
+
"epoch": 0.5233318796336677,
|
| 5615 |
+
"grad_norm": 2.575667142868042,
|
| 5616 |
+
"learning_rate": 7.235962323829262e-05,
|
| 5617 |
+
"loss": 2.3508,
|
| 5618 |
+
"step": 3600
|
| 5619 |
+
},
|
| 5620 |
+
{
|
| 5621 |
+
"epoch": 0.5233318796336677,
|
| 5622 |
+
"eval_loss": 1.9708884954452515,
|
| 5623 |
+
"eval_runtime": 18.8558,
|
| 5624 |
+
"eval_samples_per_second": 175.066,
|
| 5625 |
+
"eval_steps_per_second": 10.978,
|
| 5626 |
+
"step": 3600
|
| 5627 |
}
|
| 5628 |
],
|
| 5629 |
"logging_steps": 5,
|
|
|
|
| 5652 |
"attributes": {}
|
| 5653 |
}
|
| 5654 |
},
|
| 5655 |
+
"total_flos": 9.384889709651558e+17,
|
| 5656 |
"train_batch_size": 4,
|
| 5657 |
"trial_name": null,
|
| 5658 |
"trial_params": null
|