Training in progress, epoch 1, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step1750/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1750/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1750/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1750/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1750/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +82 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 98088784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f9b65681a5e4dae3d1166f15e110526d7dd0659622190adfab490c678c36f3c
|
| 3 |
size 98088784
|
last-checkpoint/global_step1750/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:542d95be70ce5fc2085034318fb8157da2f587a28efc4f0b879fe4ef234ce98e
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step1750/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:086520196dd722d9adbfec2176094a5c72e623564176914198099d87b80b353b
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step1750/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:22a3a5ec2308e2528cbb2fa87f1c254d0afe6bd346597b43f86db0582447ac94
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step1750/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5296f2391ae5108481635ef6b1c252594b9a3ab0cab1ce10deb31f42ebd69da2
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step1750/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3237b7b70e874acc7f83a0a3e2726db60e3e2e90d5252f3bd1a7a619c17423de
|
| 3 |
+
size 564993061
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step1750
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:db55bbd76d21410901eef766f4ea27c457a7976afda6a56c3aec6194dbccd316
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1c93ee18f4dcb462dbaccd6716b53d2aaaac174ce6e476d05966a13ba91b15f
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4ec1c53fac56c1684c8a3e92bd85dc740d49fd5758c573121ce67476a27cef9
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b6d93badf06c5d380c3300205be40194653a28adcb4c6283dca4f75f7fb9d76
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bbc8a195efc18da96bf16857984e4d60e7f36373f6730ddca95667a6b0c910ce
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2660,6 +2660,84 @@
|
|
| 2660 |
"eval_samples_per_second": 124.447,
|
| 2661 |
"eval_steps_per_second": 15.564,
|
| 2662 |
"step": 1700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2663 |
}
|
| 2664 |
],
|
| 2665 |
"logging_steps": 5,
|
|
@@ -2688,7 +2766,7 @@
|
|
| 2688 |
"attributes": {}
|
| 2689 |
}
|
| 2690 |
},
|
| 2691 |
-
"total_flos":
|
| 2692 |
"train_batch_size": 2,
|
| 2693 |
"trial_name": null,
|
| 2694 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 0.6602269411087036,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.715248009797918,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 1750,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2660 |
"eval_samples_per_second": 124.447,
|
| 2661 |
"eval_steps_per_second": 15.564,
|
| 2662 |
"step": 1700
|
| 2663 |
+
},
|
| 2664 |
+
{
|
| 2665 |
+
"epoch": 1.6711573790569503,
|
| 2666 |
+
"grad_norm": 0.203897163271904,
|
| 2667 |
+
"learning_rate": 8.399529398394039e-05,
|
| 2668 |
+
"loss": 0.6896,
|
| 2669 |
+
"step": 1705
|
| 2670 |
+
},
|
| 2671 |
+
{
|
| 2672 |
+
"epoch": 1.676056338028169,
|
| 2673 |
+
"grad_norm": 0.20551460981369019,
|
| 2674 |
+
"learning_rate": 8.384304537099798e-05,
|
| 2675 |
+
"loss": 0.6997,
|
| 2676 |
+
"step": 1710
|
| 2677 |
+
},
|
| 2678 |
+
{
|
| 2679 |
+
"epoch": 1.6809552969993877,
|
| 2680 |
+
"grad_norm": 0.24939224123954773,
|
| 2681 |
+
"learning_rate": 8.369050444776772e-05,
|
| 2682 |
+
"loss": 0.6784,
|
| 2683 |
+
"step": 1715
|
| 2684 |
+
},
|
| 2685 |
+
{
|
| 2686 |
+
"epoch": 1.6858542559706062,
|
| 2687 |
+
"grad_norm": 0.21644407510757446,
|
| 2688 |
+
"learning_rate": 8.353767278184362e-05,
|
| 2689 |
+
"loss": 0.6945,
|
| 2690 |
+
"step": 1720
|
| 2691 |
+
},
|
| 2692 |
+
{
|
| 2693 |
+
"epoch": 1.6907532149418247,
|
| 2694 |
+
"grad_norm": 0.19981370866298676,
|
| 2695 |
+
"learning_rate": 8.338455194380753e-05,
|
| 2696 |
+
"loss": 0.6901,
|
| 2697 |
+
"step": 1725
|
| 2698 |
+
},
|
| 2699 |
+
{
|
| 2700 |
+
"epoch": 1.6956521739130435,
|
| 2701 |
+
"grad_norm": 0.2333899885416031,
|
| 2702 |
+
"learning_rate": 8.323114350721291e-05,
|
| 2703 |
+
"loss": 0.6868,
|
| 2704 |
+
"step": 1730
|
| 2705 |
+
},
|
| 2706 |
+
{
|
| 2707 |
+
"epoch": 1.7005511328842622,
|
| 2708 |
+
"grad_norm": 0.2132989764213562,
|
| 2709 |
+
"learning_rate": 8.307744904856888e-05,
|
| 2710 |
+
"loss": 0.6934,
|
| 2711 |
+
"step": 1735
|
| 2712 |
+
},
|
| 2713 |
+
{
|
| 2714 |
+
"epoch": 1.7054500918554807,
|
| 2715 |
+
"grad_norm": 0.18624679744243622,
|
| 2716 |
+
"learning_rate": 8.292347014732376e-05,
|
| 2717 |
+
"loss": 0.6922,
|
| 2718 |
+
"step": 1740
|
| 2719 |
+
},
|
| 2720 |
+
{
|
| 2721 |
+
"epoch": 1.7103490508266992,
|
| 2722 |
+
"grad_norm": 0.20982161164283752,
|
| 2723 |
+
"learning_rate": 8.276920838584902e-05,
|
| 2724 |
+
"loss": 0.6768,
|
| 2725 |
+
"step": 1745
|
| 2726 |
+
},
|
| 2727 |
+
{
|
| 2728 |
+
"epoch": 1.715248009797918,
|
| 2729 |
+
"grad_norm": 0.20481140911579132,
|
| 2730 |
+
"learning_rate": 8.26146653494229e-05,
|
| 2731 |
+
"loss": 0.7054,
|
| 2732 |
+
"step": 1750
|
| 2733 |
+
},
|
| 2734 |
+
{
|
| 2735 |
+
"epoch": 1.715248009797918,
|
| 2736 |
+
"eval_loss": 0.6602269411087036,
|
| 2737 |
+
"eval_runtime": 15.6809,
|
| 2738 |
+
"eval_samples_per_second": 124.929,
|
| 2739 |
+
"eval_steps_per_second": 15.624,
|
| 2740 |
+
"step": 1750
|
| 2741 |
}
|
| 2742 |
],
|
| 2743 |
"logging_steps": 5,
|
|
|
|
| 2766 |
"attributes": {}
|
| 2767 |
}
|
| 2768 |
},
|
| 2769 |
+
"total_flos": 9.036926821219697e+17,
|
| 2770 |
"train_batch_size": 2,
|
| 2771 |
"trial_name": null,
|
| 2772 |
"trial_params": null
|