Training in progress, step 1353, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +36 -4
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 735396724
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a3b8dae878391ccdde5efef65aac68f898765585be40fdedb1763b5c32d3572
|
| 3 |
size 735396724
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1470915147
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd47712bd98321074a343390bc24e5d74bdfe0e54b79a2fb374b6c8370fe9155
|
| 3 |
size 1470915147
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3fe140131aade8de244a1f9906c0ea077cbc86618e1ec76642211d944eabdc9e
|
| 3 |
size 16389
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:692b76aced1495252a289c9275f5bfacc17dbd8baed1fc23a89e2669bd6a6ee6
|
| 3 |
size 16389
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d64529358832803e0826e10a5306180840848f0d71ee25b721a5947710297c9
|
| 3 |
size 16389
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14bbf33bb5acf93fd7470dc8fd65bac5bb46efac5fccaef5be2ca4f3df4df544
|
| 3 |
size 16389
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf55bbcce84afd6678660c3daaf444f06b34e64692fc3c5dc1f70cc20cbf3356
|
| 3 |
size 16389
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ad6f352ce2f4372e730c435e79ff404e511e04b4848728adb550294d444318c
|
| 3 |
size 16389
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:253ac58e55fb568050459353a65b6484e3db6a1e8efc73dff58aa0168523c749
|
| 3 |
size 16389
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0430e7d456ad88d276ee666d3bba222029b41da9e47292339f857192e8367cdd
|
| 3 |
size 16389
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a2b98aadf3dee607faf6695c52919e41f78eab249593a5656849f6f096106f2
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 1107,
|
| 3 |
"best_metric": 0.7315732624029576,
|
| 4 |
"best_model_checkpoint": "model/checkpoint-1107",
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 123,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -286,6 +286,38 @@
|
|
| 286 |
"eval_samples_per_second": 1989.845,
|
| 287 |
"eval_steps_per_second": 31.095,
|
| 288 |
"step": 1230
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
}
|
| 290 |
],
|
| 291 |
"logging_steps": 50,
|
|
@@ -300,12 +332,12 @@
|
|
| 300 |
"should_evaluate": false,
|
| 301 |
"should_log": false,
|
| 302 |
"should_save": true,
|
| 303 |
-
"should_training_stop":
|
| 304 |
},
|
| 305 |
"attributes": {}
|
| 306 |
}
|
| 307 |
},
|
| 308 |
-
"total_flos": 1.
|
| 309 |
"train_batch_size": 256,
|
| 310 |
"trial_name": null,
|
| 311 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": 1107,
|
| 3 |
"best_metric": 0.7315732624029576,
|
| 4 |
"best_model_checkpoint": "model/checkpoint-1107",
|
| 5 |
+
"epoch": 2.766871165644172,
|
| 6 |
"eval_steps": 123,
|
| 7 |
+
"global_step": 1353,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 286 |
"eval_samples_per_second": 1989.845,
|
| 287 |
"eval_steps_per_second": 31.095,
|
| 288 |
"step": 1230
|
| 289 |
+
},
|
| 290 |
+
{
|
| 291 |
+
"epoch": 2.556237218813906,
|
| 292 |
+
"grad_norm": 0.23193299770355225,
|
| 293 |
+
"learning_rate": 1.0139121293640819e-05,
|
| 294 |
+
"loss": 0.0883,
|
| 295 |
+
"step": 1250
|
| 296 |
+
},
|
| 297 |
+
{
|
| 298 |
+
"epoch": 2.658486707566462,
|
| 299 |
+
"grad_norm": 0.22019708156585693,
|
| 300 |
+
"learning_rate": 9.476860969626324e-06,
|
| 301 |
+
"loss": 0.0892,
|
| 302 |
+
"step": 1300
|
| 303 |
+
},
|
| 304 |
+
{
|
| 305 |
+
"epoch": 2.7607361963190185,
|
| 306 |
+
"grad_norm": 0.21104931831359863,
|
| 307 |
+
"learning_rate": 8.816895921614581e-06,
|
| 308 |
+
"loss": 0.0867,
|
| 309 |
+
"step": 1350
|
| 310 |
+
},
|
| 311 |
+
{
|
| 312 |
+
"epoch": 2.766871165644172,
|
| 313 |
+
"eval_entity_f1": 0.7256578908461596,
|
| 314 |
+
"eval_entity_precision": 0.6445169298121908,
|
| 315 |
+
"eval_entity_recall": 0.8729698187830259,
|
| 316 |
+
"eval_loss": 0.0912156030535698,
|
| 317 |
+
"eval_runtime": 75.6029,
|
| 318 |
+
"eval_samples_per_second": 1984.052,
|
| 319 |
+
"eval_steps_per_second": 31.004,
|
| 320 |
+
"step": 1353
|
| 321 |
}
|
| 322 |
],
|
| 323 |
"logging_steps": 50,
|
|
|
|
| 332 |
"should_evaluate": false,
|
| 333 |
"should_log": false,
|
| 334 |
"should_save": true,
|
| 335 |
+
"should_training_stop": true
|
| 336 |
},
|
| 337 |
"attributes": {}
|
| 338 |
}
|
| 339 |
},
|
| 340 |
+
"total_flos": 1.7151309741922714e+17,
|
| 341 |
"train_batch_size": 256,
|
| 342 |
"trial_name": null,
|
| 343 |
"trial_params": null
|