Training in progress, step 2000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 69782384
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dec94902aeef1da66fe3799f460774d7e11ff3ca1348ec1fd836a52e60e3a304
|
| 3 |
size 69782384
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 139790651
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:605837a2951e9cd7a6fa40ee23c3d5a25507180cd69e9506a92611de6b726860
|
| 3 |
size 139790651
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff888a4ec4f2fab2bfd92e943e0a38d0b8f4e4883309963373b6809f52c08918
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f59f59a7859ef7b5a7545b424e0b16de3a0435ca89fcc924c7795332a2791a7e
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 1800,
|
| 3 |
"best_metric": 2.1885855197906494,
|
| 4 |
"best_model_checkpoint": "./outputs/checkpoint-1800",
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 200,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -332,6 +332,42 @@
|
|
| 332 |
"eval_samples_per_second": 81.85,
|
| 333 |
"eval_steps_per_second": 2.583,
|
| 334 |
"step": 1800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 335 |
}
|
| 336 |
],
|
| 337 |
"logging_steps": 50,
|
|
@@ -346,7 +382,7 @@
|
|
| 346 |
"early_stopping_threshold": 0.0
|
| 347 |
},
|
| 348 |
"attributes": {
|
| 349 |
-
"early_stopping_patience_counter":
|
| 350 |
}
|
| 351 |
},
|
| 352 |
"TrainerControl": {
|
|
@@ -360,7 +396,7 @@
|
|
| 360 |
"attributes": {}
|
| 361 |
}
|
| 362 |
},
|
| 363 |
-
"total_flos": 5.
|
| 364 |
"train_batch_size": 32,
|
| 365 |
"trial_name": null,
|
| 366 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": 1800,
|
| 3 |
"best_metric": 2.1885855197906494,
|
| 4 |
"best_model_checkpoint": "./outputs/checkpoint-1800",
|
| 5 |
+
"epoch": 2.1715526601520088,
|
| 6 |
"eval_steps": 200,
|
| 7 |
+
"global_step": 2000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 332 |
"eval_samples_per_second": 81.85,
|
| 333 |
"eval_steps_per_second": 2.583,
|
| 334 |
"step": 1800
|
| 335 |
+
},
|
| 336 |
+
{
|
| 337 |
+
"epoch": 2.008686210640608,
|
| 338 |
+
"grad_norm": 0.9358265399932861,
|
| 339 |
+
"learning_rate": 5.097252214851365e-05,
|
| 340 |
+
"loss": 2.165,
|
| 341 |
+
"step": 1850
|
| 342 |
+
},
|
| 343 |
+
{
|
| 344 |
+
"epoch": 2.0629750271444083,
|
| 345 |
+
"grad_norm": 1.0949631929397583,
|
| 346 |
+
"learning_rate": 4.601121531384579e-05,
|
| 347 |
+
"loss": 2.1348,
|
| 348 |
+
"step": 1900
|
| 349 |
+
},
|
| 350 |
+
{
|
| 351 |
+
"epoch": 2.1172638436482085,
|
| 352 |
+
"grad_norm": 1.3909953832626343,
|
| 353 |
+
"learning_rate": 4.1230843398675555e-05,
|
| 354 |
+
"loss": 2.1399,
|
| 355 |
+
"step": 1950
|
| 356 |
+
},
|
| 357 |
+
{
|
| 358 |
+
"epoch": 2.1715526601520088,
|
| 359 |
+
"grad_norm": 1.2448049783706665,
|
| 360 |
+
"learning_rate": 3.664742706752925e-05,
|
| 361 |
+
"loss": 2.1417,
|
| 362 |
+
"step": 2000
|
| 363 |
+
},
|
| 364 |
+
{
|
| 365 |
+
"epoch": 2.1715526601520088,
|
| 366 |
+
"eval_loss": 2.194857120513916,
|
| 367 |
+
"eval_runtime": 7.3786,
|
| 368 |
+
"eval_samples_per_second": 81.587,
|
| 369 |
+
"eval_steps_per_second": 2.575,
|
| 370 |
+
"step": 2000
|
| 371 |
}
|
| 372 |
],
|
| 373 |
"logging_steps": 50,
|
|
|
|
| 382 |
"early_stopping_threshold": 0.0
|
| 383 |
},
|
| 384 |
"attributes": {
|
| 385 |
+
"early_stopping_patience_counter": 1
|
| 386 |
}
|
| 387 |
},
|
| 388 |
"TrainerControl": {
|
|
|
|
| 396 |
"attributes": {}
|
| 397 |
}
|
| 398 |
},
|
| 399 |
+
"total_flos": 5.602045311097897e+17,
|
| 400 |
"train_batch_size": 32,
|
| 401 |
"trial_name": null,
|
| 402 |
"trial_params": null
|