Training in progress, step 120, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step120/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step120/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step120/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step120/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step120/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +78 -6
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 80014016
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e07bb356c3c42e5cdc07d10d2311f6b5d583b9c0243d40576869a8a8468b6d98
|
| 3 |
size 80014016
|
last-checkpoint/global_step120/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66dc06d93f59c002bd6980986bbd53dab716b8588332f09e290885a539e536f3
|
| 3 |
+
size 60296272
|
last-checkpoint/global_step120/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:274b6848025365e3dda92951d0ca3e5d15f264a98269a8c215418b1e6630f50c
|
| 3 |
+
size 60296400
|
last-checkpoint/global_step120/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e92415e1541c796ef60a91fd9fc6f046a75c96bd56685c357cc821aba8e9ad88
|
| 3 |
+
size 60296400
|
last-checkpoint/global_step120/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:42e7578deced32b54388e031ab71404067972e3c7e6413672d16489b6bb4b9cb
|
| 3 |
+
size 60296400
|
last-checkpoint/global_step120/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0051d479ecabaf7850bc6a78ce63326f1ec673808c117ceaae6b5af296aa15b8
|
| 3 |
+
size 80296428
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step120
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca446724d1ebce77ccae7f35340aabb59e5bca2ba79e103237fd5f8b74b534ac
|
| 3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72284f3bf1ea5778c38096c11524890c08f7bd83e9ef49537cb678a37a80dc25
|
| 3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a8554acf33597a260ef009c9e86cc0718a7640fdf53a7e753811647743bf70a
|
| 3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b0f0993e706c9ca5d8f708ef0f01e07d0edb3dd3e3db326b06ad6f26f3cf62c
|
| 3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31d2e54503626585fe3bd1382687ba707fdc17dcc88507487c697e511203f899
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 1.
|
| 4 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 20,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -160,6 +160,78 @@
|
|
| 160 |
"eval_samples_per_second": 14.47,
|
| 161 |
"eval_steps_per_second": 0.907,
|
| 162 |
"step": 80
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
}
|
| 164 |
],
|
| 165 |
"logging_steps": 5,
|
|
@@ -188,7 +260,7 @@
|
|
| 188 |
"attributes": {}
|
| 189 |
}
|
| 190 |
},
|
| 191 |
-
"total_flos":
|
| 192 |
"train_batch_size": 4,
|
| 193 |
"trial_name": null,
|
| 194 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 120,
|
| 3 |
+
"best_metric": 1.1266472339630127,
|
| 4 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-120",
|
| 5 |
+
"epoch": 0.9876543209876543,
|
| 6 |
"eval_steps": 20,
|
| 7 |
+
"global_step": 120,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 160 |
"eval_samples_per_second": 14.47,
|
| 161 |
"eval_steps_per_second": 0.907,
|
| 162 |
"step": 80
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"epoch": 0.6995884773662552,
|
| 166 |
+
"grad_norm": 0.06558868288993835,
|
| 167 |
+
"learning_rate": 0.00011200000000000001,
|
| 168 |
+
"loss": 1.1719,
|
| 169 |
+
"step": 85
|
| 170 |
+
},
|
| 171 |
+
{
|
| 172 |
+
"epoch": 0.7407407407407407,
|
| 173 |
+
"grad_norm": 0.06846272945404053,
|
| 174 |
+
"learning_rate": 0.00011866666666666669,
|
| 175 |
+
"loss": 1.1555,
|
| 176 |
+
"step": 90
|
| 177 |
+
},
|
| 178 |
+
{
|
| 179 |
+
"epoch": 0.7818930041152263,
|
| 180 |
+
"grad_norm": 0.07038144767284393,
|
| 181 |
+
"learning_rate": 0.00012533333333333334,
|
| 182 |
+
"loss": 1.1683,
|
| 183 |
+
"step": 95
|
| 184 |
+
},
|
| 185 |
+
{
|
| 186 |
+
"epoch": 0.823045267489712,
|
| 187 |
+
"grad_norm": 0.07254977524280548,
|
| 188 |
+
"learning_rate": 0.000132,
|
| 189 |
+
"loss": 1.1592,
|
| 190 |
+
"step": 100
|
| 191 |
+
},
|
| 192 |
+
{
|
| 193 |
+
"epoch": 0.823045267489712,
|
| 194 |
+
"eval_loss": 1.148273229598999,
|
| 195 |
+
"eval_runtime": 274.3966,
|
| 196 |
+
"eval_samples_per_second": 14.413,
|
| 197 |
+
"eval_steps_per_second": 0.904,
|
| 198 |
+
"step": 100
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.8641975308641975,
|
| 202 |
+
"grad_norm": 0.07729992270469666,
|
| 203 |
+
"learning_rate": 0.00013866666666666669,
|
| 204 |
+
"loss": 1.1482,
|
| 205 |
+
"step": 105
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.9053497942386831,
|
| 209 |
+
"grad_norm": 0.08038458973169327,
|
| 210 |
+
"learning_rate": 0.00014533333333333333,
|
| 211 |
+
"loss": 1.1458,
|
| 212 |
+
"step": 110
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.9465020576131687,
|
| 216 |
+
"grad_norm": 0.08932825922966003,
|
| 217 |
+
"learning_rate": 0.000152,
|
| 218 |
+
"loss": 1.1384,
|
| 219 |
+
"step": 115
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.9876543209876543,
|
| 223 |
+
"grad_norm": 0.08660374581813812,
|
| 224 |
+
"learning_rate": 0.00015866666666666668,
|
| 225 |
+
"loss": 1.129,
|
| 226 |
+
"step": 120
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.9876543209876543,
|
| 230 |
+
"eval_loss": 1.1266472339630127,
|
| 231 |
+
"eval_runtime": 272.2952,
|
| 232 |
+
"eval_samples_per_second": 14.525,
|
| 233 |
+
"eval_steps_per_second": 0.911,
|
| 234 |
+
"step": 120
|
| 235 |
}
|
| 236 |
],
|
| 237 |
"logging_steps": 5,
|
|
|
|
| 260 |
"attributes": {}
|
| 261 |
}
|
| 262 |
},
|
| 263 |
+
"total_flos": 2.5093043373987594e+18,
|
| 264 |
"train_batch_size": 4,
|
| 265 |
"trial_name": null,
|
| 266 |
"trial_params": null
|