Training in progress, epoch 3, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step3101/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3101/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3101/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3101/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3101/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +82 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 98088784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d3d3618a6549bfada4acfab50184a00e8169ef527f9fd2d6299b8877447758cb
|
| 3 |
size 98088784
|
last-checkpoint/global_step3101/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4fb533299f62b9ce25f3ae0988e4c2e58b101d1c5c87106b853300fad700bcf
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step3101/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c6add0427eab5604df8457f0b0c96ae6e13a9298f74b97446cdd0a3e51c32ad
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step3101/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:538f203bc9dec335c9ed3c93f40b10793d873a7700c2c3e88d7e5f29437e5d14
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step3101/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ffb96ff2f8ba9effc52557142b9549943034c6246447792f71e93b98f29a8bf
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step3101/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c401fb0075566b0e43341ee59679950230715a00367a0f966497bf458e6ba4e
|
| 3 |
+
size 564993061
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step3101
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c9f2639515e9a4dbb73442f973fea9b0c88c20b795038a946c56e0e2493ca27
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ecc1dea28e987710088b449c94c516228a22c7362dc59c8c30f086a46639bae7
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5887804b4b795f7a573283c1b112d762af0778415a90f4dd264b13d8ce6b6e73
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d19ad5422c32a302e30ca2b15abb40a89c5c32a5565cad1734400f2ab8b8c060
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40dddd77817570d823fe2af41de3280b2532f2b28178ec1ba2cdd4222f46c420
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4766,6 +4766,84 @@
|
|
| 4766 |
"eval_samples_per_second": 126.335,
|
| 4767 |
"eval_steps_per_second": 15.8,
|
| 4768 |
"step": 3050
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4769 |
}
|
| 4770 |
],
|
| 4771 |
"logging_steps": 5,
|
|
@@ -4794,7 +4872,7 @@
|
|
| 4794 |
"attributes": {}
|
| 4795 |
}
|
| 4796 |
},
|
| 4797 |
-
"total_flos": 1.
|
| 4798 |
"train_batch_size": 2,
|
| 4799 |
"trial_name": null,
|
| 4800 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 0.620843768119812,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 3.039191671769749,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 3100,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4766 |
"eval_samples_per_second": 126.335,
|
| 4767 |
"eval_steps_per_second": 15.8,
|
| 4768 |
"step": 3050
|
| 4769 |
+
},
|
| 4770 |
+
{
|
| 4771 |
+
"epoch": 2.994488671157379,
|
| 4772 |
+
"grad_norm": 0.23352594673633575,
|
| 4773 |
+
"learning_rate": 3.780729435136962e-05,
|
| 4774 |
+
"loss": 0.6777,
|
| 4775 |
+
"step": 3055
|
| 4776 |
+
},
|
| 4777 |
+
{
|
| 4778 |
+
"epoch": 2.9993876301285978,
|
| 4779 |
+
"grad_norm": 0.19848263263702393,
|
| 4780 |
+
"learning_rate": 3.763898458660783e-05,
|
| 4781 |
+
"loss": 0.6618,
|
| 4782 |
+
"step": 3060
|
| 4783 |
+
},
|
| 4784 |
+
{
|
| 4785 |
+
"epoch": 3.0048989589712187,
|
| 4786 |
+
"grad_norm": 0.21553494036197662,
|
| 4787 |
+
"learning_rate": 3.747085732977055e-05,
|
| 4788 |
+
"loss": 0.7689,
|
| 4789 |
+
"step": 3065
|
| 4790 |
+
},
|
| 4791 |
+
{
|
| 4792 |
+
"epoch": 3.0097979179424375,
|
| 4793 |
+
"grad_norm": 0.23350001871585846,
|
| 4794 |
+
"learning_rate": 3.730291430862548e-05,
|
| 4795 |
+
"loss": 0.6462,
|
| 4796 |
+
"step": 3070
|
| 4797 |
+
},
|
| 4798 |
+
{
|
| 4799 |
+
"epoch": 3.0146968769136557,
|
| 4800 |
+
"grad_norm": 0.22312819957733154,
|
| 4801 |
+
"learning_rate": 3.713515724904701e-05,
|
| 4802 |
+
"loss": 0.629,
|
| 4803 |
+
"step": 3075
|
| 4804 |
+
},
|
| 4805 |
+
{
|
| 4806 |
+
"epoch": 3.0195958358848745,
|
| 4807 |
+
"grad_norm": 0.2268122434616089,
|
| 4808 |
+
"learning_rate": 3.696758787499846e-05,
|
| 4809 |
+
"loss": 0.6607,
|
| 4810 |
+
"step": 3080
|
| 4811 |
+
},
|
| 4812 |
+
{
|
| 4813 |
+
"epoch": 3.024494794856093,
|
| 4814 |
+
"grad_norm": 0.22305847704410553,
|
| 4815 |
+
"learning_rate": 3.6800207908514434e-05,
|
| 4816 |
+
"loss": 0.5964,
|
| 4817 |
+
"step": 3085
|
| 4818 |
+
},
|
| 4819 |
+
{
|
| 4820 |
+
"epoch": 3.029393753827312,
|
| 4821 |
+
"grad_norm": 0.23181577026844025,
|
| 4822 |
+
"learning_rate": 3.6633019069683054e-05,
|
| 4823 |
+
"loss": 0.6446,
|
| 4824 |
+
"step": 3090
|
| 4825 |
+
},
|
| 4826 |
+
{
|
| 4827 |
+
"epoch": 3.03429271279853,
|
| 4828 |
+
"grad_norm": 0.2492285519838333,
|
| 4829 |
+
"learning_rate": 3.646602307662833e-05,
|
| 4830 |
+
"loss": 0.6519,
|
| 4831 |
+
"step": 3095
|
| 4832 |
+
},
|
| 4833 |
+
{
|
| 4834 |
+
"epoch": 3.039191671769749,
|
| 4835 |
+
"grad_norm": 0.2302083671092987,
|
| 4836 |
+
"learning_rate": 3.629922164549246e-05,
|
| 4837 |
+
"loss": 0.647,
|
| 4838 |
+
"step": 3100
|
| 4839 |
+
},
|
| 4840 |
+
{
|
| 4841 |
+
"epoch": 3.039191671769749,
|
| 4842 |
+
"eval_loss": 0.620843768119812,
|
| 4843 |
+
"eval_runtime": 15.4094,
|
| 4844 |
+
"eval_samples_per_second": 127.13,
|
| 4845 |
+
"eval_steps_per_second": 15.899,
|
| 4846 |
+
"step": 3100
|
| 4847 |
}
|
| 4848 |
],
|
| 4849 |
"logging_steps": 5,
|
|
|
|
| 4872 |
"attributes": {}
|
| 4873 |
}
|
| 4874 |
},
|
| 4875 |
+
"total_flos": 1.5987517748506788e+18,
|
| 4876 |
"train_batch_size": 2,
|
| 4877 |
"trial_name": null,
|
| 4878 |
"trial_params": null
|