Training in progress, step 2100, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +21 -6
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 136000488
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd05797eba4af2ca90d5c8aedc8a2a141b146602a03127e89e52aeaa78c51cb0
|
| 3 |
size 136000488
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 268176506
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5505d7094552ed7661bfc40965c065158aa0046fb8d6f07d161f457d37e1da5f
|
| 3 |
size 268176506
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:51d51837dd781f74c987a30a788fb34245746bf69bd7d3e79d3c65a22612eb23
|
| 3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1cfd5e7fe0683153dd34505da9c613acbbf24ffe8ad301d2cfe6618d63d98896
|
| 3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfc574ffdad315f35625e325f3592a5576c7dfb0a7edc69111f4fc7430d5f8bd
|
| 3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:207a31c0d5216576c258e1e019adc0b0471874547e0afd1e435ea0357317731f
|
| 3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62f7b847089c23254653d5114a76b90d4e846cc6929c539b8969df05d9501e6f
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 0.
|
| 3 |
-
"best_model_checkpoint": "mgh6/TCS_MLM/checkpoint-
|
| 4 |
-
"epoch": 2.
|
| 5 |
"eval_steps": 100,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -307,6 +307,21 @@
|
|
| 307 |
"eval_samples_per_second": 890.609,
|
| 308 |
"eval_steps_per_second": 3.599,
|
| 309 |
"step": 2000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
}
|
| 311 |
],
|
| 312 |
"logging_steps": 100,
|
|
@@ -321,7 +336,7 @@
|
|
| 321 |
"early_stopping_threshold": 0.0
|
| 322 |
},
|
| 323 |
"attributes": {
|
| 324 |
-
"early_stopping_patience_counter":
|
| 325 |
}
|
| 326 |
},
|
| 327 |
"TrainerControl": {
|
|
@@ -335,7 +350,7 @@
|
|
| 335 |
"attributes": {}
|
| 336 |
}
|
| 337 |
},
|
| 338 |
-
"total_flos": 7.
|
| 339 |
"train_batch_size": 64,
|
| 340 |
"trial_name": null,
|
| 341 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 0.8368203639984131,
|
| 3 |
+
"best_model_checkpoint": "mgh6/TCS_MLM/checkpoint-2100",
|
| 4 |
+
"epoch": 2.8112449799196786,
|
| 5 |
"eval_steps": 100,
|
| 6 |
+
"global_step": 2100,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 307 |
"eval_samples_per_second": 890.609,
|
| 308 |
"eval_steps_per_second": 3.599,
|
| 309 |
"step": 2000
|
| 310 |
+
},
|
| 311 |
+
{
|
| 312 |
+
"epoch": 2.8112449799196786,
|
| 313 |
+
"grad_norm": 0.2042614370584488,
|
| 314 |
+
"learning_rate": 0.000718875502008032,
|
| 315 |
+
"loss": 0.8372,
|
| 316 |
+
"step": 2100
|
| 317 |
+
},
|
| 318 |
+
{
|
| 319 |
+
"epoch": 2.8112449799196786,
|
| 320 |
+
"eval_loss": 0.8368203639984131,
|
| 321 |
+
"eval_runtime": 6.3674,
|
| 322 |
+
"eval_samples_per_second": 893.931,
|
| 323 |
+
"eval_steps_per_second": 3.612,
|
| 324 |
+
"step": 2100
|
| 325 |
}
|
| 326 |
],
|
| 327 |
"logging_steps": 100,
|
|
|
|
| 336 |
"early_stopping_threshold": 0.0
|
| 337 |
},
|
| 338 |
"attributes": {
|
| 339 |
+
"early_stopping_patience_counter": 0
|
| 340 |
}
|
| 341 |
},
|
| 342 |
"TrainerControl": {
|
|
|
|
| 350 |
"attributes": {}
|
| 351 |
}
|
| 352 |
},
|
| 353 |
+
"total_flos": 7.6255768608768e+16,
|
| 354 |
"train_batch_size": 64,
|
| 355 |
"trial_name": null,
|
| 356 |
"trial_params": null
|