Training in progress, epoch 30, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 990185320
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:efa0d3a9c1506002f928fecb048a5cd100d53c2f5b5dbd9403c23027a7acb60c
|
| 3 |
size 990185320
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1980541387
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0fef191fd03c6714ffbb4b03d87e8dd4d98235051bcb5b2f7eb2e8394c2e9665
|
| 3 |
size 1980541387
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9044f2a8508562d34a024d3e8ad4386288255ee124dc5623e57819532eef2e88
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ecea6d0fd3948e5b4ccfc315e9a77fbe98506ece50162f5206e2243ebb2a7de9
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -3362,6 +3362,119 @@
|
|
| 3362 |
"eval_samples_per_second": 22.138,
|
| 3363 |
"eval_steps_per_second": 2.767,
|
| 3364 |
"step": 44631
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3365 |
}
|
| 3366 |
],
|
| 3367 |
"logging_steps": 100,
|
|
@@ -3376,12 +3489,12 @@
|
|
| 3376 |
"should_evaluate": false,
|
| 3377 |
"should_log": false,
|
| 3378 |
"should_save": true,
|
| 3379 |
-
"should_training_stop":
|
| 3380 |
},
|
| 3381 |
"attributes": {}
|
| 3382 |
}
|
| 3383 |
},
|
| 3384 |
-
"total_flos":
|
| 3385 |
"train_batch_size": 8,
|
| 3386 |
"trial_name": null,
|
| 3387 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 30.0,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 46170,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 3362 |
"eval_samples_per_second": 22.138,
|
| 3363 |
"eval_steps_per_second": 2.767,
|
| 3364 |
"step": 44631
|
| 3365 |
+
},
|
| 3366 |
+
{
|
| 3367 |
+
"epoch": 29.044834307992204,
|
| 3368 |
+
"grad_norm": 5.871100425720215,
|
| 3369 |
+
"learning_rate": 1.593025774312324e-06,
|
| 3370 |
+
"loss": 1.4208,
|
| 3371 |
+
"step": 44700
|
| 3372 |
+
},
|
| 3373 |
+
{
|
| 3374 |
+
"epoch": 29.109811565951915,
|
| 3375 |
+
"grad_norm": 4.591679096221924,
|
| 3376 |
+
"learning_rate": 1.4847303443794673e-06,
|
| 3377 |
+
"loss": 1.3698,
|
| 3378 |
+
"step": 44800
|
| 3379 |
+
},
|
| 3380 |
+
{
|
| 3381 |
+
"epoch": 29.17478882391163,
|
| 3382 |
+
"grad_norm": 4.2391157150268555,
|
| 3383 |
+
"learning_rate": 1.3764349144466105e-06,
|
| 3384 |
+
"loss": 1.411,
|
| 3385 |
+
"step": 44900
|
| 3386 |
+
},
|
| 3387 |
+
{
|
| 3388 |
+
"epoch": 29.239766081871345,
|
| 3389 |
+
"grad_norm": 5.3565239906311035,
|
| 3390 |
+
"learning_rate": 1.2681394845137535e-06,
|
| 3391 |
+
"loss": 1.4736,
|
| 3392 |
+
"step": 45000
|
| 3393 |
+
},
|
| 3394 |
+
{
|
| 3395 |
+
"epoch": 29.30474333983106,
|
| 3396 |
+
"grad_norm": 3.925321578979492,
|
| 3397 |
+
"learning_rate": 1.1598440545808967e-06,
|
| 3398 |
+
"loss": 1.4591,
|
| 3399 |
+
"step": 45100
|
| 3400 |
+
},
|
| 3401 |
+
{
|
| 3402 |
+
"epoch": 29.369720597790774,
|
| 3403 |
+
"grad_norm": 4.0369462966918945,
|
| 3404 |
+
"learning_rate": 1.05154862464804e-06,
|
| 3405 |
+
"loss": 1.4353,
|
| 3406 |
+
"step": 45200
|
| 3407 |
+
},
|
| 3408 |
+
{
|
| 3409 |
+
"epoch": 29.43469785575049,
|
| 3410 |
+
"grad_norm": 6.875803470611572,
|
| 3411 |
+
"learning_rate": 9.43253194715183e-07,
|
| 3412 |
+
"loss": 1.4348,
|
| 3413 |
+
"step": 45300
|
| 3414 |
+
},
|
| 3415 |
+
{
|
| 3416 |
+
"epoch": 29.4996751137102,
|
| 3417 |
+
"grad_norm": 5.557791233062744,
|
| 3418 |
+
"learning_rate": 8.349577647823262e-07,
|
| 3419 |
+
"loss": 1.3936,
|
| 3420 |
+
"step": 45400
|
| 3421 |
+
},
|
| 3422 |
+
{
|
| 3423 |
+
"epoch": 29.564652371669915,
|
| 3424 |
+
"grad_norm": 2.878941059112549,
|
| 3425 |
+
"learning_rate": 7.266623348494695e-07,
|
| 3426 |
+
"loss": 1.4049,
|
| 3427 |
+
"step": 45500
|
| 3428 |
+
},
|
| 3429 |
+
{
|
| 3430 |
+
"epoch": 29.62962962962963,
|
| 3431 |
+
"grad_norm": 4.448305130004883,
|
| 3432 |
+
"learning_rate": 6.183669049166126e-07,
|
| 3433 |
+
"loss": 1.4117,
|
| 3434 |
+
"step": 45600
|
| 3435 |
+
},
|
| 3436 |
+
{
|
| 3437 |
+
"epoch": 29.694606887589345,
|
| 3438 |
+
"grad_norm": 4.321474075317383,
|
| 3439 |
+
"learning_rate": 5.100714749837557e-07,
|
| 3440 |
+
"loss": 1.4495,
|
| 3441 |
+
"step": 45700
|
| 3442 |
+
},
|
| 3443 |
+
{
|
| 3444 |
+
"epoch": 29.75958414554906,
|
| 3445 |
+
"grad_norm": 5.657812118530273,
|
| 3446 |
+
"learning_rate": 4.0177604505089883e-07,
|
| 3447 |
+
"loss": 1.3955,
|
| 3448 |
+
"step": 45800
|
| 3449 |
+
},
|
| 3450 |
+
{
|
| 3451 |
+
"epoch": 29.82456140350877,
|
| 3452 |
+
"grad_norm": 4.73406457901001,
|
| 3453 |
+
"learning_rate": 2.93480615118042e-07,
|
| 3454 |
+
"loss": 1.4954,
|
| 3455 |
+
"step": 45900
|
| 3456 |
+
},
|
| 3457 |
+
{
|
| 3458 |
+
"epoch": 29.889538661468485,
|
| 3459 |
+
"grad_norm": 3.9184389114379883,
|
| 3460 |
+
"learning_rate": 1.851851851851852e-07,
|
| 3461 |
+
"loss": 1.463,
|
| 3462 |
+
"step": 46000
|
| 3463 |
+
},
|
| 3464 |
+
{
|
| 3465 |
+
"epoch": 29.9545159194282,
|
| 3466 |
+
"grad_norm": 3.517953872680664,
|
| 3467 |
+
"learning_rate": 7.688975525232836e-08,
|
| 3468 |
+
"loss": 1.4204,
|
| 3469 |
+
"step": 46100
|
| 3470 |
+
},
|
| 3471 |
+
{
|
| 3472 |
+
"epoch": 30.0,
|
| 3473 |
+
"eval_loss": 1.3695261478424072,
|
| 3474 |
+
"eval_runtime": 62.0405,
|
| 3475 |
+
"eval_samples_per_second": 22.05,
|
| 3476 |
+
"eval_steps_per_second": 2.756,
|
| 3477 |
+
"step": 46170
|
| 3478 |
}
|
| 3479 |
],
|
| 3480 |
"logging_steps": 100,
|
|
|
|
| 3489 |
"should_evaluate": false,
|
| 3490 |
"should_log": false,
|
| 3491 |
"should_save": true,
|
| 3492 |
+
"should_training_stop": true
|
| 3493 |
},
|
| 3494 |
"attributes": {}
|
| 3495 |
}
|
| 3496 |
},
|
| 3497 |
+
"total_flos": 7.195641550995456e+16,
|
| 3498 |
"train_batch_size": 8,
|
| 3499 |
"trial_name": null,
|
| 3500 |
"trial_params": null
|