Training in progress, step 2200, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 69782384
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a227244b79f305f140948c1079fcdc545f071391bb7e4ff9cf542a898d157d1c
|
| 3 |
size 69782384
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 139790651
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9e0d7d55f9a230dad78340581c7686c5db109d5d23cb37fd0af07013b1770c4
|
| 3 |
size 139790651
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92bebe68c4903b16edae13a87b639062f86297d20f12bfc3a43205a7d64356a6
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:42a9c19735c8da4979dd9d9844a70622260a91c03b4b752eeed5aefa51d6bbfb
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 1800,
|
| 3 |
"best_metric": 2.1885855197906494,
|
| 4 |
"best_model_checkpoint": "./outputs/checkpoint-1800",
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 200,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -368,6 +368,42 @@
|
|
| 368 |
"eval_samples_per_second": 81.587,
|
| 369 |
"eval_steps_per_second": 2.575,
|
| 370 |
"step": 2000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 371 |
}
|
| 372 |
],
|
| 373 |
"logging_steps": 50,
|
|
@@ -382,7 +418,7 @@
|
|
| 382 |
"early_stopping_threshold": 0.0
|
| 383 |
},
|
| 384 |
"attributes": {
|
| 385 |
-
"early_stopping_patience_counter":
|
| 386 |
}
|
| 387 |
},
|
| 388 |
"TrainerControl": {
|
|
@@ -396,7 +432,7 @@
|
|
| 396 |
"attributes": {}
|
| 397 |
}
|
| 398 |
},
|
| 399 |
-
"total_flos":
|
| 400 |
"train_batch_size": 32,
|
| 401 |
"trial_name": null,
|
| 402 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": 1800,
|
| 3 |
"best_metric": 2.1885855197906494,
|
| 4 |
"best_model_checkpoint": "./outputs/checkpoint-1800",
|
| 5 |
+
"epoch": 2.3887079261672097,
|
| 6 |
"eval_steps": 200,
|
| 7 |
+
"global_step": 2200,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 368 |
"eval_samples_per_second": 81.587,
|
| 369 |
"eval_steps_per_second": 2.575,
|
| 370 |
"step": 2000
|
| 371 |
+
},
|
| 372 |
+
{
|
| 373 |
+
"epoch": 2.225841476655809,
|
| 374 |
+
"grad_norm": 1.2955018281936646,
|
| 375 |
+
"learning_rate": 3.227632691927414e-05,
|
| 376 |
+
"loss": 2.1308,
|
| 377 |
+
"step": 2050
|
| 378 |
+
},
|
| 379 |
+
{
|
| 380 |
+
"epoch": 2.2801302931596092,
|
| 381 |
+
"grad_norm": 1.0340995788574219,
|
| 382 |
+
"learning_rate": 2.8132192008487768e-05,
|
| 383 |
+
"loss": 2.129,
|
| 384 |
+
"step": 2100
|
| 385 |
+
},
|
| 386 |
+
{
|
| 387 |
+
"epoch": 2.3344191096634095,
|
| 388 |
+
"grad_norm": 1.2585651874542236,
|
| 389 |
+
"learning_rate": 2.4228910751455625e-05,
|
| 390 |
+
"loss": 2.1403,
|
| 391 |
+
"step": 2150
|
| 392 |
+
},
|
| 393 |
+
{
|
| 394 |
+
"epoch": 2.3887079261672097,
|
| 395 |
+
"grad_norm": 1.2523435354232788,
|
| 396 |
+
"learning_rate": 2.0579564381328775e-05,
|
| 397 |
+
"loss": 2.1197,
|
| 398 |
+
"step": 2200
|
| 399 |
+
},
|
| 400 |
+
{
|
| 401 |
+
"epoch": 2.3887079261672097,
|
| 402 |
+
"eval_loss": 2.190831422805786,
|
| 403 |
+
"eval_runtime": 7.3454,
|
| 404 |
+
"eval_samples_per_second": 81.956,
|
| 405 |
+
"eval_steps_per_second": 2.587,
|
| 406 |
+
"step": 2200
|
| 407 |
}
|
| 408 |
],
|
| 409 |
"logging_steps": 50,
|
|
|
|
| 418 |
"early_stopping_threshold": 0.0
|
| 419 |
},
|
| 420 |
"attributes": {
|
| 421 |
+
"early_stopping_patience_counter": 2
|
| 422 |
}
|
| 423 |
},
|
| 424 |
"TrainerControl": {
|
|
|
|
| 432 |
"attributes": {}
|
| 433 |
}
|
| 434 |
},
|
| 435 |
+
"total_flos": 6.162730083190702e+17,
|
| 436 |
"train_batch_size": 32,
|
| 437 |
"trial_name": null,
|
| 438 |
"trial_params": null
|