Training in progress, epoch 0, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step1220/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1220/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1220/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1220/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1220/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +40 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 46708280
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2bcd1f99ca4639d5b7aeddd12e28d8ae4f66d17cb473b2d54aaeb23e2af3a90c
|
| 3 |
size 46708280
|
last-checkpoint/global_step1220/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:64ad49aa0764581eb18dba5361c652635c87dd910c3ec584db7af66e0ea850eb
|
| 3 |
+
size 35203941
|
last-checkpoint/global_step1220/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3df07287976a4ae0b3a6f366185192fae937f1a132311fc534bdd405e77802fd
|
| 3 |
+
size 35203941
|
last-checkpoint/global_step1220/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed4b735fcb85fd83e7ed250f2192cc3b2e74a86ff1254c584c90e1170755aa9f
|
| 3 |
+
size 35204005
|
last-checkpoint/global_step1220/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:58543f4049c28a886ff21739d7917eb63cf2045ca1a0804d41ac22d0186f0d67
|
| 3 |
+
size 35204005
|
last-checkpoint/global_step1220/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a37c86006de61d0be3b78586059e4d98ea657b269c132c722369e6e4bc65670f
|
| 3 |
+
size 46865049
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step1220
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:814c0560f0064c72cb05d6fdabce12f4ea60678ef99ae76b35beb0a5b61a746f
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d55f786f89c825e7eccff5f5ad6f275ae339974e5fc7ca3adc2610c4ddc7dc29
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1050cb4078b8fb421e9d7cd11460b4f8b3150009a89652c0ce835de2f415292
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05a9ac1e1c4df9ca87b88219341350f97eb24cac74de6cbc9e039d0e0d106d3c
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:99c32e39d9113c7bbf215e86b059c76588bc6462dcc095622f3886bf1b72f7af
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 1.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 20,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2168,6 +2168,42 @@
|
|
| 2168 |
"eval_samples_per_second": 512.638,
|
| 2169 |
"eval_steps_per_second": 16.021,
|
| 2170 |
"step": 1200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2171 |
}
|
| 2172 |
],
|
| 2173 |
"logging_steps": 5,
|
|
@@ -2196,7 +2232,7 @@
|
|
| 2196 |
"attributes": {}
|
| 2197 |
}
|
| 2198 |
},
|
| 2199 |
-
"total_flos": 5.
|
| 2200 |
"train_batch_size": 8,
|
| 2201 |
"trial_name": null,
|
| 2202 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 1.0451515913009644,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.17368402320532442,
|
| 6 |
"eval_steps": 20,
|
| 7 |
+
"global_step": 1220,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2168 |
"eval_samples_per_second": 512.638,
|
| 2169 |
"eval_steps_per_second": 16.021,
|
| 2170 |
"step": 1200
|
| 2171 |
+
},
|
| 2172 |
+
{
|
| 2173 |
+
"epoch": 0.1715485639036196,
|
| 2174 |
+
"grad_norm": 1.4823371171951294,
|
| 2175 |
+
"learning_rate": 2.8734766443620955e-05,
|
| 2176 |
+
"loss": 1.0662,
|
| 2177 |
+
"step": 1205
|
| 2178 |
+
},
|
| 2179 |
+
{
|
| 2180 |
+
"epoch": 0.17226038367085453,
|
| 2181 |
+
"grad_norm": 1.2662029266357422,
|
| 2182 |
+
"learning_rate": 2.8733780986581105e-05,
|
| 2183 |
+
"loss": 1.0515,
|
| 2184 |
+
"step": 1210
|
| 2185 |
+
},
|
| 2186 |
+
{
|
| 2187 |
+
"epoch": 0.17297220343808947,
|
| 2188 |
+
"grad_norm": 1.5725047588348389,
|
| 2189 |
+
"learning_rate": 2.8732791017215324e-05,
|
| 2190 |
+
"loss": 1.0703,
|
| 2191 |
+
"step": 1215
|
| 2192 |
+
},
|
| 2193 |
+
{
|
| 2194 |
+
"epoch": 0.17368402320532442,
|
| 2195 |
+
"grad_norm": 1.4703493118286133,
|
| 2196 |
+
"learning_rate": 2.8731796535835716e-05,
|
| 2197 |
+
"loss": 1.0047,
|
| 2198 |
+
"step": 1220
|
| 2199 |
+
},
|
| 2200 |
+
{
|
| 2201 |
+
"epoch": 0.17368402320532442,
|
| 2202 |
+
"eval_loss": 1.0451515913009644,
|
| 2203 |
+
"eval_runtime": 194.8815,
|
| 2204 |
+
"eval_samples_per_second": 512.614,
|
| 2205 |
+
"eval_steps_per_second": 16.02,
|
| 2206 |
+
"step": 1220
|
| 2207 |
}
|
| 2208 |
],
|
| 2209 |
"logging_steps": 5,
|
|
|
|
| 2232 |
"attributes": {}
|
| 2233 |
}
|
| 2234 |
},
|
| 2235 |
+
"total_flos": 5.974615545851412e+17,
|
| 2236 |
"train_batch_size": 8,
|
| 2237 |
"trial_name": null,
|
| 2238 |
"trial_params": null
|