Training in progress, step 4650, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1482788592
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c38b95ab4aac2f796998b2cfae140f7979cade13adf03e7235fd1703458464a
|
| 3 |
size 1482788592
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2897966842
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8f2403d6ad34d7bc46cc37f112101c35525ca09ffdb487a561af0837bd32f6f7
|
| 3 |
size 2897966842
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:52314eeefbaa59acf058c36cd4c74c081929976886aaef2fb3ee73049c9effaf
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1256
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:931d38342b692c160fcb90f4d9acb8e6f6634f499984cdd5b99b0563194d400a
|
| 3 |
size 1256
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 1.
|
| 3 |
-
"best_model_checkpoint": "./output/checkpoint-
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -3397,6 +3397,119 @@
|
|
| 3397 |
"eval_samples_per_second": 10.436,
|
| 3398 |
"eval_steps_per_second": 10.436,
|
| 3399 |
"step": 4500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3400 |
}
|
| 3401 |
],
|
| 3402 |
"logging_steps": 10,
|
|
@@ -3416,7 +3529,7 @@
|
|
| 3416 |
"attributes": {}
|
| 3417 |
}
|
| 3418 |
},
|
| 3419 |
-
"total_flos": 3.
|
| 3420 |
"train_batch_size": 4,
|
| 3421 |
"trial_name": null,
|
| 3422 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 1.2152043581008911,
|
| 3 |
+
"best_model_checkpoint": "./output/checkpoint-4650",
|
| 4 |
+
"epoch": 0.3073567321039064,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 4650,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 3397 |
"eval_samples_per_second": 10.436,
|
| 3398 |
"eval_steps_per_second": 10.436,
|
| 3399 |
"step": 4500
|
| 3400 |
+
},
|
| 3401 |
+
{
|
| 3402 |
+
"epoch": 0.2981029810298103,
|
| 3403 |
+
"grad_norm": 10.411969184875488,
|
| 3404 |
+
"learning_rate": 5.472047830984499e-07,
|
| 3405 |
+
"loss": 1.1499,
|
| 3406 |
+
"step": 4510
|
| 3407 |
+
},
|
| 3408 |
+
{
|
| 3409 |
+
"epoch": 0.2987639632493886,
|
| 3410 |
+
"grad_norm": 6.937885761260986,
|
| 3411 |
+
"learning_rate": 5.252725889984403e-07,
|
| 3412 |
+
"loss": 1.0297,
|
| 3413 |
+
"step": 4520
|
| 3414 |
+
},
|
| 3415 |
+
{
|
| 3416 |
+
"epoch": 0.29942494546896686,
|
| 3417 |
+
"grad_norm": 10.743237495422363,
|
| 3418 |
+
"learning_rate": 5.037783829820298e-07,
|
| 3419 |
+
"loss": 1.1198,
|
| 3420 |
+
"step": 4530
|
| 3421 |
+
},
|
| 3422 |
+
{
|
| 3423 |
+
"epoch": 0.3000859276885452,
|
| 3424 |
+
"grad_norm": 5.665622234344482,
|
| 3425 |
+
"learning_rate": 4.827230485918372e-07,
|
| 3426 |
+
"loss": 1.0459,
|
| 3427 |
+
"step": 4540
|
| 3428 |
+
},
|
| 3429 |
+
{
|
| 3430 |
+
"epoch": 0.30074690990812347,
|
| 3431 |
+
"grad_norm": 9.720799446105957,
|
| 3432 |
+
"learning_rate": 4.6210745133019236e-07,
|
| 3433 |
+
"loss": 1.1943,
|
| 3434 |
+
"step": 4550
|
| 3435 |
+
},
|
| 3436 |
+
{
|
| 3437 |
+
"epoch": 0.30140789212770175,
|
| 3438 |
+
"grad_norm": 11.57904052734375,
|
| 3439 |
+
"learning_rate": 4.419324386235529e-07,
|
| 3440 |
+
"loss": 1.2007,
|
| 3441 |
+
"step": 4560
|
| 3442 |
+
},
|
| 3443 |
+
{
|
| 3444 |
+
"epoch": 0.3020688743472801,
|
| 3445 |
+
"grad_norm": 10.47191333770752,
|
| 3446 |
+
"learning_rate": 4.2219883978767386e-07,
|
| 3447 |
+
"loss": 1.1754,
|
| 3448 |
+
"step": 4570
|
| 3449 |
+
},
|
| 3450 |
+
{
|
| 3451 |
+
"epoch": 0.30272985656685836,
|
| 3452 |
+
"grad_norm": 8.371639251708984,
|
| 3453 |
+
"learning_rate": 4.029074659935082e-07,
|
| 3454 |
+
"loss": 1.0829,
|
| 3455 |
+
"step": 4580
|
| 3456 |
+
},
|
| 3457 |
+
{
|
| 3458 |
+
"epoch": 0.30339083878643663,
|
| 3459 |
+
"grad_norm": 11.640840530395508,
|
| 3460 |
+
"learning_rate": 3.8405911023387444e-07,
|
| 3461 |
+
"loss": 1.0573,
|
| 3462 |
+
"step": 4590
|
| 3463 |
+
},
|
| 3464 |
+
{
|
| 3465 |
+
"epoch": 0.3040518210060149,
|
| 3466 |
+
"grad_norm": 14.082575798034668,
|
| 3467 |
+
"learning_rate": 3.6565454729085526e-07,
|
| 3468 |
+
"loss": 1.2711,
|
| 3469 |
+
"step": 4600
|
| 3470 |
+
},
|
| 3471 |
+
{
|
| 3472 |
+
"epoch": 0.30471280322559324,
|
| 3473 |
+
"grad_norm": 8.940695762634277,
|
| 3474 |
+
"learning_rate": 3.4769453370394753e-07,
|
| 3475 |
+
"loss": 1.1595,
|
| 3476 |
+
"step": 4610
|
| 3477 |
+
},
|
| 3478 |
+
{
|
| 3479 |
+
"epoch": 0.3053737854451715,
|
| 3480 |
+
"grad_norm": 7.7234954833984375,
|
| 3481 |
+
"learning_rate": 3.301798077389637e-07,
|
| 3482 |
+
"loss": 1.2151,
|
| 3483 |
+
"step": 4620
|
| 3484 |
+
},
|
| 3485 |
+
{
|
| 3486 |
+
"epoch": 0.3060347676647498,
|
| 3487 |
+
"grad_norm": 4.756081581115723,
|
| 3488 |
+
"learning_rate": 3.1311108935768926e-07,
|
| 3489 |
+
"loss": 1.173,
|
| 3490 |
+
"step": 4630
|
| 3491 |
+
},
|
| 3492 |
+
{
|
| 3493 |
+
"epoch": 0.30669574988432813,
|
| 3494 |
+
"grad_norm": 10.524628639221191,
|
| 3495 |
+
"learning_rate": 2.964890801882817e-07,
|
| 3496 |
+
"loss": 1.0992,
|
| 3497 |
+
"step": 4640
|
| 3498 |
+
},
|
| 3499 |
+
{
|
| 3500 |
+
"epoch": 0.3073567321039064,
|
| 3501 |
+
"grad_norm": 6.618716716766357,
|
| 3502 |
+
"learning_rate": 2.8031446349643393e-07,
|
| 3503 |
+
"loss": 1.1152,
|
| 3504 |
+
"step": 4650
|
| 3505 |
+
},
|
| 3506 |
+
{
|
| 3507 |
+
"epoch": 0.3073567321039064,
|
| 3508 |
+
"eval_loss": 1.2152043581008911,
|
| 3509 |
+
"eval_runtime": 53.4713,
|
| 3510 |
+
"eval_samples_per_second": 9.37,
|
| 3511 |
+
"eval_steps_per_second": 9.37,
|
| 3512 |
+
"step": 4650
|
| 3513 |
}
|
| 3514 |
],
|
| 3515 |
"logging_steps": 10,
|
|
|
|
| 3529 |
"attributes": {}
|
| 3530 |
}
|
| 3531 |
},
|
| 3532 |
+
"total_flos": 3.2041141329494016e+17,
|
| 3533 |
"train_batch_size": 4,
|
| 3534 |
"trial_name": null,
|
| 3535 |
"trial_params": null
|