Training in progress, epoch 1, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step1600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1600/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1600/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1600/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1600/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +82 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 98088784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0b2192cab039402b759c7755739934206cf2585c7ded8de3eba8e8a4711f0f2
|
| 3 |
size 98088784
|
last-checkpoint/global_step1600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:30e6242c13ae98b8a5cbe60e5ad651c403bb13deedea6b758ac3fc066fc1a3ac
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step1600/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03ed76c8aaa1ef74b40fbc859d853a5c8344f37e789b087826352ff27c85296a
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step1600/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:384d53dcec29199c1a7bf744018c71f65b4a798b58a2526e0a6f485dc25b1cad
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step1600/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e006e5a3030d4fc513e5c423de939f5b9cd0faa645e0b0ad363d85078ceccd8e
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step1600/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11bbc3b21a3d45ce24346133fd20140bab87f069916c01102d188f1090ea0202
|
| 3 |
+
size 564993061
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step1600
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1eedf98ea9975d3d0f462291ea7b85b1da5289375a42dca5aaa06c6d74ce34c5
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c1d8118304cff88474c86198c7f5aa76463f040ece3bc21b07aecbb56d4fa64a
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:429e5f6db3d40572a6c2433c9bdbbba576389574bcdd6f230c7b20ea60fcad6b
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25916e2cde20cc021d3d06b16632aaf89d5216f10390eef1707ef1a019a03407
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5aa753350ab43e2a824d8e89dd96b9a3ddede15e688d15c87d248607ff08488a
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2426,6 +2426,84 @@
|
|
| 2426 |
"eval_samples_per_second": 126.077,
|
| 2427 |
"eval_steps_per_second": 15.768,
|
| 2428 |
"step": 1550
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2429 |
}
|
| 2430 |
],
|
| 2431 |
"logging_steps": 5,
|
|
@@ -2454,7 +2532,7 @@
|
|
| 2454 |
"attributes": {}
|
| 2455 |
}
|
| 2456 |
},
|
| 2457 |
-
"total_flos":
|
| 2458 |
"train_batch_size": 2,
|
| 2459 |
"trial_name": null,
|
| 2460 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 0.6650952696800232,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.5682792406613595,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 1600,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2426 |
"eval_samples_per_second": 126.077,
|
| 2427 |
"eval_steps_per_second": 15.768,
|
| 2428 |
"step": 1550
|
| 2429 |
+
},
|
| 2430 |
+
{
|
| 2431 |
+
"epoch": 1.524188609920392,
|
| 2432 |
+
"grad_norm": 0.20915450155735016,
|
| 2433 |
+
"learning_rate": 8.841917952432193e-05,
|
| 2434 |
+
"loss": 0.7048,
|
| 2435 |
+
"step": 1555
|
| 2436 |
+
},
|
| 2437 |
+
{
|
| 2438 |
+
"epoch": 1.5290875688916106,
|
| 2439 |
+
"grad_norm": 0.21618790924549103,
|
| 2440 |
+
"learning_rate": 8.827641367989242e-05,
|
| 2441 |
+
"loss": 0.6895,
|
| 2442 |
+
"step": 1560
|
| 2443 |
+
},
|
| 2444 |
+
{
|
| 2445 |
+
"epoch": 1.533986527862829,
|
| 2446 |
+
"grad_norm": 0.20739570260047913,
|
| 2447 |
+
"learning_rate": 8.813330996545632e-05,
|
| 2448 |
+
"loss": 0.712,
|
| 2449 |
+
"step": 1565
|
| 2450 |
+
},
|
| 2451 |
+
{
|
| 2452 |
+
"epoch": 1.5388854868340478,
|
| 2453 |
+
"grad_norm": 0.2209644615650177,
|
| 2454 |
+
"learning_rate": 8.798986985162568e-05,
|
| 2455 |
+
"loss": 0.6987,
|
| 2456 |
+
"step": 1570
|
| 2457 |
+
},
|
| 2458 |
+
{
|
| 2459 |
+
"epoch": 1.5437844458052665,
|
| 2460 |
+
"grad_norm": 0.20145706832408905,
|
| 2461 |
+
"learning_rate": 8.784609481246963e-05,
|
| 2462 |
+
"loss": 0.71,
|
| 2463 |
+
"step": 1575
|
| 2464 |
+
},
|
| 2465 |
+
{
|
| 2466 |
+
"epoch": 1.548683404776485,
|
| 2467 |
+
"grad_norm": 0.20716360211372375,
|
| 2468 |
+
"learning_rate": 8.770198632549912e-05,
|
| 2469 |
+
"loss": 0.6984,
|
| 2470 |
+
"step": 1580
|
| 2471 |
+
},
|
| 2472 |
+
{
|
| 2473 |
+
"epoch": 1.5535823637477035,
|
| 2474 |
+
"grad_norm": 0.1935468167066574,
|
| 2475 |
+
"learning_rate": 8.755754587165184e-05,
|
| 2476 |
+
"loss": 0.701,
|
| 2477 |
+
"step": 1585
|
| 2478 |
+
},
|
| 2479 |
+
{
|
| 2480 |
+
"epoch": 1.5584813227189223,
|
| 2481 |
+
"grad_norm": 0.19698968529701233,
|
| 2482 |
+
"learning_rate": 8.741277493527693e-05,
|
| 2483 |
+
"loss": 0.6829,
|
| 2484 |
+
"step": 1590
|
| 2485 |
+
},
|
| 2486 |
+
{
|
| 2487 |
+
"epoch": 1.563380281690141,
|
| 2488 |
+
"grad_norm": 0.20820119976997375,
|
| 2489 |
+
"learning_rate": 8.726767500411974e-05,
|
| 2490 |
+
"loss": 0.6963,
|
| 2491 |
+
"step": 1595
|
| 2492 |
+
},
|
| 2493 |
+
{
|
| 2494 |
+
"epoch": 1.5682792406613595,
|
| 2495 |
+
"grad_norm": 0.18384264409542084,
|
| 2496 |
+
"learning_rate": 8.712224756930659e-05,
|
| 2497 |
+
"loss": 0.7072,
|
| 2498 |
+
"step": 1600
|
| 2499 |
+
},
|
| 2500 |
+
{
|
| 2501 |
+
"epoch": 1.5682792406613595,
|
| 2502 |
+
"eval_loss": 0.6650952696800232,
|
| 2503 |
+
"eval_runtime": 15.609,
|
| 2504 |
+
"eval_samples_per_second": 125.505,
|
| 2505 |
+
"eval_steps_per_second": 15.696,
|
| 2506 |
+
"step": 1600
|
| 2507 |
}
|
| 2508 |
],
|
| 2509 |
"logging_steps": 5,
|
|
|
|
| 2532 |
"attributes": {}
|
| 2533 |
}
|
| 2534 |
},
|
| 2535 |
+
"total_flos": 8.252213790611866e+17,
|
| 2536 |
"train_batch_size": 2,
|
| 2537 |
"trial_name": null,
|
| 2538 |
"trial_params": null
|