Training in progress, epoch 0, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step5450/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5450/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5450/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5450/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5450/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +160 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1037269336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f79687bb9f5f0706366f2c8be19c15c67471263d18f90d20b5060477db9fc88
|
| 3 |
size 1037269336
|
last-checkpoint/global_step5450/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f78bf83e85325413df0f2bbacc777613cf49c82d2596e1692688e54d70978f84
|
| 3 |
+
size 781993445
|
last-checkpoint/global_step5450/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7163f01fcaee0f145ef1c75eff4454c619bcde5defdbee79ee575da430be5511
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step5450/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:48a871b3a94e17f81203820fec74d78f54c3ec465668d40c6c5bdb43c82c175d
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step5450/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b6c79cb7ac7b4c8bd9401c92c69b89682dbdaf4e12f4225e20205f83e52d77c
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step5450/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37adf013650b50908706c5990607180032ad85ba97a40654a79dfd22b7b740e5
|
| 3 |
+
size 2610290277
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step5450
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09517ccbebacf6bd023c1ab9d33afd5ec868b9be2770425bd6ebefa3839d5f4d
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d62fb500bd7f639c86a4805d99914de20d8c185a99a488bb6ea36449fa573a0
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:763a3f897c1e33a97ed5b1f4dd7ab1bdca39ada5f60f258f0e9cd8f218878aaa
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:074a5f66e9ddd88b37d69172a271f48d50878d6d7b7fdbdb1735f35f2e0a2b15
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9aab12b223f7247afaaf46de482c72204945729f45b93a867c6ad025ed23f245
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 1.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -8354,6 +8354,162 @@
|
|
| 8354 |
"eval_samples_per_second": 172.536,
|
| 8355 |
"eval_steps_per_second": 10.819,
|
| 8356 |
"step": 5350
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8357 |
}
|
| 8358 |
],
|
| 8359 |
"logging_steps": 5,
|
|
@@ -8382,7 +8538,7 @@
|
|
| 8382 |
"attributes": {}
|
| 8383 |
}
|
| 8384 |
},
|
| 8385 |
-
"total_flos": 1.
|
| 8386 |
"train_batch_size": 4,
|
| 8387 |
"trial_name": null,
|
| 8388 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 1.8068690299987793,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.7922663177787469,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 5450,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 8354 |
"eval_samples_per_second": 172.536,
|
| 8355 |
"eval_steps_per_second": 10.819,
|
| 8356 |
"step": 5350
|
| 8357 |
+
},
|
| 8358 |
+
{
|
| 8359 |
+
"epoch": 0.7784561709550807,
|
| 8360 |
+
"grad_norm": 2.200019598007202,
|
| 8361 |
+
"learning_rate": 4.5272915415458025e-05,
|
| 8362 |
+
"loss": 1.8298,
|
| 8363 |
+
"step": 5355
|
| 8364 |
+
},
|
| 8365 |
+
{
|
| 8366 |
+
"epoch": 0.7791830207879052,
|
| 8367 |
+
"grad_norm": 2.1728880405426025,
|
| 8368 |
+
"learning_rate": 4.5193801886732545e-05,
|
| 8369 |
+
"loss": 1.8708,
|
| 8370 |
+
"step": 5360
|
| 8371 |
+
},
|
| 8372 |
+
{
|
| 8373 |
+
"epoch": 0.7799098706207298,
|
| 8374 |
+
"grad_norm": 2.5079801082611084,
|
| 8375 |
+
"learning_rate": 4.511470070524542e-05,
|
| 8376 |
+
"loss": 2.0338,
|
| 8377 |
+
"step": 5365
|
| 8378 |
+
},
|
| 8379 |
+
{
|
| 8380 |
+
"epoch": 0.7806367204535543,
|
| 8381 |
+
"grad_norm": 2.2738819122314453,
|
| 8382 |
+
"learning_rate": 4.5035612070133724e-05,
|
| 8383 |
+
"loss": 2.0264,
|
| 8384 |
+
"step": 5370
|
| 8385 |
+
},
|
| 8386 |
+
{
|
| 8387 |
+
"epoch": 0.7813635702863788,
|
| 8388 |
+
"grad_norm": 2.4879889488220215,
|
| 8389 |
+
"learning_rate": 4.495653618050305e-05,
|
| 8390 |
+
"loss": 2.0748,
|
| 8391 |
+
"step": 5375
|
| 8392 |
+
},
|
| 8393 |
+
{
|
| 8394 |
+
"epoch": 0.7820904201192034,
|
| 8395 |
+
"grad_norm": 2.4474239349365234,
|
| 8396 |
+
"learning_rate": 4.487747323542682e-05,
|
| 8397 |
+
"loss": 2.1098,
|
| 8398 |
+
"step": 5380
|
| 8399 |
+
},
|
| 8400 |
+
{
|
| 8401 |
+
"epoch": 0.7828172699520279,
|
| 8402 |
+
"grad_norm": 2.3970248699188232,
|
| 8403 |
+
"learning_rate": 4.4798423433945934e-05,
|
| 8404 |
+
"loss": 1.9963,
|
| 8405 |
+
"step": 5385
|
| 8406 |
+
},
|
| 8407 |
+
{
|
| 8408 |
+
"epoch": 0.7835441197848525,
|
| 8409 |
+
"grad_norm": 2.433213472366333,
|
| 8410 |
+
"learning_rate": 4.4719386975068136e-05,
|
| 8411 |
+
"loss": 1.8989,
|
| 8412 |
+
"step": 5390
|
| 8413 |
+
},
|
| 8414 |
+
{
|
| 8415 |
+
"epoch": 0.784270969617677,
|
| 8416 |
+
"grad_norm": 2.2355504035949707,
|
| 8417 |
+
"learning_rate": 4.464036405776766e-05,
|
| 8418 |
+
"loss": 1.9435,
|
| 8419 |
+
"step": 5395
|
| 8420 |
+
},
|
| 8421 |
+
{
|
| 8422 |
+
"epoch": 0.7849978194505015,
|
| 8423 |
+
"grad_norm": 2.4685556888580322,
|
| 8424 |
+
"learning_rate": 4.45613548809846e-05,
|
| 8425 |
+
"loss": 2.0392,
|
| 8426 |
+
"step": 5400
|
| 8427 |
+
},
|
| 8428 |
+
{
|
| 8429 |
+
"epoch": 0.7849978194505015,
|
| 8430 |
+
"eval_loss": 1.8167221546173096,
|
| 8431 |
+
"eval_runtime": 21.1854,
|
| 8432 |
+
"eval_samples_per_second": 155.815,
|
| 8433 |
+
"eval_steps_per_second": 9.771,
|
| 8434 |
+
"step": 5400
|
| 8435 |
+
},
|
| 8436 |
+
{
|
| 8437 |
+
"epoch": 0.7857246692833261,
|
| 8438 |
+
"grad_norm": 2.497147560119629,
|
| 8439 |
+
"learning_rate": 4.4482359643624416e-05,
|
| 8440 |
+
"loss": 2.0125,
|
| 8441 |
+
"step": 5405
|
| 8442 |
+
},
|
| 8443 |
+
{
|
| 8444 |
+
"epoch": 0.7864515191161506,
|
| 8445 |
+
"grad_norm": 2.2153327465057373,
|
| 8446 |
+
"learning_rate": 4.440337854455758e-05,
|
| 8447 |
+
"loss": 1.9912,
|
| 8448 |
+
"step": 5410
|
| 8449 |
+
},
|
| 8450 |
+
{
|
| 8451 |
+
"epoch": 0.7871783689489752,
|
| 8452 |
+
"grad_norm": 2.377063751220703,
|
| 8453 |
+
"learning_rate": 4.4324411782618886e-05,
|
| 8454 |
+
"loss": 2.0526,
|
| 8455 |
+
"step": 5415
|
| 8456 |
+
},
|
| 8457 |
+
{
|
| 8458 |
+
"epoch": 0.7879052187817996,
|
| 8459 |
+
"grad_norm": 2.868448257446289,
|
| 8460 |
+
"learning_rate": 4.424545955660708e-05,
|
| 8461 |
+
"loss": 2.0737,
|
| 8462 |
+
"step": 5420
|
| 8463 |
+
},
|
| 8464 |
+
{
|
| 8465 |
+
"epoch": 0.7886320686146242,
|
| 8466 |
+
"grad_norm": 2.2389824390411377,
|
| 8467 |
+
"learning_rate": 4.416652206528426e-05,
|
| 8468 |
+
"loss": 2.0863,
|
| 8469 |
+
"step": 5425
|
| 8470 |
+
},
|
| 8471 |
+
{
|
| 8472 |
+
"epoch": 0.7893589184474488,
|
| 8473 |
+
"grad_norm": 2.287515878677368,
|
| 8474 |
+
"learning_rate": 4.4087599507375526e-05,
|
| 8475 |
+
"loss": 2.1026,
|
| 8476 |
+
"step": 5430
|
| 8477 |
+
},
|
| 8478 |
+
{
|
| 8479 |
+
"epoch": 0.7900857682802733,
|
| 8480 |
+
"grad_norm": 2.3255653381347656,
|
| 8481 |
+
"learning_rate": 4.4008692081568266e-05,
|
| 8482 |
+
"loss": 1.9035,
|
| 8483 |
+
"step": 5435
|
| 8484 |
+
},
|
| 8485 |
+
{
|
| 8486 |
+
"epoch": 0.7908126181130979,
|
| 8487 |
+
"grad_norm": 2.2820541858673096,
|
| 8488 |
+
"learning_rate": 4.3929799986511875e-05,
|
| 8489 |
+
"loss": 2.0517,
|
| 8490 |
+
"step": 5440
|
| 8491 |
+
},
|
| 8492 |
+
{
|
| 8493 |
+
"epoch": 0.7915394679459223,
|
| 8494 |
+
"grad_norm": 2.6271772384643555,
|
| 8495 |
+
"learning_rate": 4.3850923420817075e-05,
|
| 8496 |
+
"loss": 2.0402,
|
| 8497 |
+
"step": 5445
|
| 8498 |
+
},
|
| 8499 |
+
{
|
| 8500 |
+
"epoch": 0.7922663177787469,
|
| 8501 |
+
"grad_norm": 2.2059221267700195,
|
| 8502 |
+
"learning_rate": 4.3772062583055546e-05,
|
| 8503 |
+
"loss": 2.1433,
|
| 8504 |
+
"step": 5450
|
| 8505 |
+
},
|
| 8506 |
+
{
|
| 8507 |
+
"epoch": 0.7922663177787469,
|
| 8508 |
+
"eval_loss": 1.8068690299987793,
|
| 8509 |
+
"eval_runtime": 19.1691,
|
| 8510 |
+
"eval_samples_per_second": 172.204,
|
| 8511 |
+
"eval_steps_per_second": 10.799,
|
| 8512 |
+
"step": 5450
|
| 8513 |
}
|
| 8514 |
],
|
| 8515 |
"logging_steps": 5,
|
|
|
|
| 8538 |
"attributes": {}
|
| 8539 |
}
|
| 8540 |
},
|
| 8541 |
+
"total_flos": 1.4208114924453888e+18,
|
| 8542 |
"train_batch_size": 4,
|
| 8543 |
"trial_name": null,
|
| 8544 |
"trial_params": null
|