Training in progress, step 380000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +2 -2
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 202194449
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:241d9085c124b3eba48c530116f68f59d85b2df3443facaec6ea11dc49d55a64
|
| 3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d4bc41779e2d6304b9419d2772739a130d22e86fd6c9f3afbd2e0c3ee5419e4
|
| 3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e37ec2ad84f7ba85cc772e99cb8545a4cbfaa7381a41ddd4a3aa91e0ce61e68
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:85519fa51d75acbbb3048d7882484da3d5005c19c2db1456cbcb7c7e288fefdc
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4bb0afece5c8069cc11f1d74f2cbe0cc7ca51acd29eabfe984c5004653b66638
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f7a8a69ea84c76c1ba62360211ec1aee5a43eb5be6b28be19dd35082fc0f872
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c9dfd822ea158ad304a36506ae43c2b1a7adf656df5c9032384a0d9d826989e
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd61e923aa53357f018bb8f972acd03e5e22cec973afb7a44242ee37ecdd9eac
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:358496c77d19a4e645872ac224e8695ee5602a8fd7b41e2a32b9cf0a419156c8
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a344adc4e56950e93eda7f993a6791867b71bbebf0dede9e3e7fb6302a4626eb
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c2047b5f47fc3de929bb0738f7fbdd248300ab063f6fd4eddcabc29f5482852
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 11.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -7406,11 +7406,211 @@
|
|
| 7406 |
"eval_samples_per_second": 1964.929,
|
| 7407 |
"eval_steps_per_second": 31.439,
|
| 7408 |
"step": 370000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7409 |
}
|
| 7410 |
],
|
| 7411 |
"max_steps": 500000,
|
| 7412 |
"num_train_epochs": 16,
|
| 7413 |
-
"total_flos": 1.
|
| 7414 |
"trial_name": null,
|
| 7415 |
"trial_params": null
|
| 7416 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 11.642513557400656,
|
| 5 |
+
"global_step": 380000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 7406 |
"eval_samples_per_second": 1964.929,
|
| 7407 |
"eval_steps_per_second": 31.439,
|
| 7408 |
"step": 370000
|
| 7409 |
+
},
|
| 7410 |
+
{
|
| 7411 |
+
"epoch": 11.35,
|
| 7412 |
+
"learning_rate": 6.0012253629189544e-05,
|
| 7413 |
+
"loss": 0.3214,
|
| 7414 |
+
"step": 370500
|
| 7415 |
+
},
|
| 7416 |
+
{
|
| 7417 |
+
"epoch": 11.37,
|
| 7418 |
+
"learning_rate": 5.965048145015944e-05,
|
| 7419 |
+
"loss": 0.3212,
|
| 7420 |
+
"step": 371000
|
| 7421 |
+
},
|
| 7422 |
+
{
|
| 7423 |
+
"epoch": 11.37,
|
| 7424 |
+
"eval_loss": 0.7814466953277588,
|
| 7425 |
+
"eval_runtime": 0.5088,
|
| 7426 |
+
"eval_samples_per_second": 1965.383,
|
| 7427 |
+
"eval_steps_per_second": 31.446,
|
| 7428 |
+
"step": 371000
|
| 7429 |
+
},
|
| 7430 |
+
{
|
| 7431 |
+
"epoch": 11.38,
|
| 7432 |
+
"learning_rate": 5.928975199818785e-05,
|
| 7433 |
+
"loss": 0.3215,
|
| 7434 |
+
"step": 371500
|
| 7435 |
+
},
|
| 7436 |
+
{
|
| 7437 |
+
"epoch": 11.4,
|
| 7438 |
+
"learning_rate": 5.893006921815428e-05,
|
| 7439 |
+
"loss": 0.3213,
|
| 7440 |
+
"step": 372000
|
| 7441 |
+
},
|
| 7442 |
+
{
|
| 7443 |
+
"epoch": 11.4,
|
| 7444 |
+
"eval_loss": 0.7858847975730896,
|
| 7445 |
+
"eval_runtime": 0.5207,
|
| 7446 |
+
"eval_samples_per_second": 1920.357,
|
| 7447 |
+
"eval_steps_per_second": 30.726,
|
| 7448 |
+
"step": 372000
|
| 7449 |
+
},
|
| 7450 |
+
{
|
| 7451 |
+
"epoch": 11.41,
|
| 7452 |
+
"learning_rate": 5.857143704349198e-05,
|
| 7453 |
+
"loss": 0.321,
|
| 7454 |
+
"step": 372500
|
| 7455 |
+
},
|
| 7456 |
+
{
|
| 7457 |
+
"epoch": 11.43,
|
| 7458 |
+
"learning_rate": 5.8213859396144986e-05,
|
| 7459 |
+
"loss": 0.3213,
|
| 7460 |
+
"step": 373000
|
| 7461 |
+
},
|
| 7462 |
+
{
|
| 7463 |
+
"epoch": 11.43,
|
| 7464 |
+
"eval_loss": 0.780572772026062,
|
| 7465 |
+
"eval_runtime": 0.523,
|
| 7466 |
+
"eval_samples_per_second": 1911.94,
|
| 7467 |
+
"eval_steps_per_second": 30.591,
|
| 7468 |
+
"step": 373000
|
| 7469 |
+
},
|
| 7470 |
+
{
|
| 7471 |
+
"epoch": 11.44,
|
| 7472 |
+
"learning_rate": 5.785734018652507e-05,
|
| 7473 |
+
"loss": 0.3212,
|
| 7474 |
+
"step": 373500
|
| 7475 |
+
},
|
| 7476 |
+
{
|
| 7477 |
+
"epoch": 11.46,
|
| 7478 |
+
"learning_rate": 5.750188331346927e-05,
|
| 7479 |
+
"loss": 0.3211,
|
| 7480 |
+
"step": 374000
|
| 7481 |
+
},
|
| 7482 |
+
{
|
| 7483 |
+
"epoch": 11.46,
|
| 7484 |
+
"eval_loss": 0.7766540050506592,
|
| 7485 |
+
"eval_runtime": 0.5243,
|
| 7486 |
+
"eval_samples_per_second": 1907.256,
|
| 7487 |
+
"eval_steps_per_second": 30.516,
|
| 7488 |
+
"step": 374000
|
| 7489 |
+
},
|
| 7490 |
+
{
|
| 7491 |
+
"epoch": 11.47,
|
| 7492 |
+
"learning_rate": 5.714749266419695e-05,
|
| 7493 |
+
"loss": 0.3207,
|
| 7494 |
+
"step": 374500
|
| 7495 |
+
},
|
| 7496 |
+
{
|
| 7497 |
+
"epoch": 11.49,
|
| 7498 |
+
"learning_rate": 5.6794172114267566e-05,
|
| 7499 |
+
"loss": 0.3208,
|
| 7500 |
+
"step": 375000
|
| 7501 |
+
},
|
| 7502 |
+
{
|
| 7503 |
+
"epoch": 11.49,
|
| 7504 |
+
"eval_loss": 0.7823048233985901,
|
| 7505 |
+
"eval_runtime": 0.5063,
|
| 7506 |
+
"eval_samples_per_second": 1975.154,
|
| 7507 |
+
"eval_steps_per_second": 31.602,
|
| 7508 |
+
"step": 375000
|
| 7509 |
+
},
|
| 7510 |
+
{
|
| 7511 |
+
"epoch": 11.5,
|
| 7512 |
+
"learning_rate": 5.6441925527537914e-05,
|
| 7513 |
+
"loss": 0.3207,
|
| 7514 |
+
"step": 375500
|
| 7515 |
+
},
|
| 7516 |
+
{
|
| 7517 |
+
"epoch": 11.52,
|
| 7518 |
+
"learning_rate": 5.60907567561203e-05,
|
| 7519 |
+
"loss": 0.3202,
|
| 7520 |
+
"step": 376000
|
| 7521 |
+
},
|
| 7522 |
+
{
|
| 7523 |
+
"epoch": 11.52,
|
| 7524 |
+
"eval_loss": 0.7723506689071655,
|
| 7525 |
+
"eval_runtime": 0.5144,
|
| 7526 |
+
"eval_samples_per_second": 1944.173,
|
| 7527 |
+
"eval_steps_per_second": 31.107,
|
| 7528 |
+
"step": 376000
|
| 7529 |
+
},
|
| 7530 |
+
{
|
| 7531 |
+
"epoch": 11.54,
|
| 7532 |
+
"learning_rate": 5.574066964034012e-05,
|
| 7533 |
+
"loss": 0.3207,
|
| 7534 |
+
"step": 376500
|
| 7535 |
+
},
|
| 7536 |
+
{
|
| 7537 |
+
"epoch": 11.55,
|
| 7538 |
+
"learning_rate": 5.539166800869402e-05,
|
| 7539 |
+
"loss": 0.3208,
|
| 7540 |
+
"step": 377000
|
| 7541 |
+
},
|
| 7542 |
+
{
|
| 7543 |
+
"epoch": 11.55,
|
| 7544 |
+
"eval_loss": 0.7742220163345337,
|
| 7545 |
+
"eval_runtime": 0.4989,
|
| 7546 |
+
"eval_samples_per_second": 2004.565,
|
| 7547 |
+
"eval_steps_per_second": 32.073,
|
| 7548 |
+
"step": 377000
|
| 7549 |
+
},
|
| 7550 |
+
{
|
| 7551 |
+
"epoch": 11.57,
|
| 7552 |
+
"learning_rate": 5.5043755677807955e-05,
|
| 7553 |
+
"loss": 0.3206,
|
| 7554 |
+
"step": 377500
|
| 7555 |
+
},
|
| 7556 |
+
{
|
| 7557 |
+
"epoch": 11.58,
|
| 7558 |
+
"learning_rate": 5.4696936452395344e-05,
|
| 7559 |
+
"loss": 0.3205,
|
| 7560 |
+
"step": 378000
|
| 7561 |
+
},
|
| 7562 |
+
{
|
| 7563 |
+
"epoch": 11.58,
|
| 7564 |
+
"eval_loss": 0.7778945565223694,
|
| 7565 |
+
"eval_runtime": 0.5033,
|
| 7566 |
+
"eval_samples_per_second": 1986.827,
|
| 7567 |
+
"eval_steps_per_second": 31.789,
|
| 7568 |
+
"step": 378000
|
| 7569 |
+
},
|
| 7570 |
+
{
|
| 7571 |
+
"epoch": 11.6,
|
| 7572 |
+
"learning_rate": 5.435121412521576e-05,
|
| 7573 |
+
"loss": 0.3207,
|
| 7574 |
+
"step": 378500
|
| 7575 |
+
},
|
| 7576 |
+
{
|
| 7577 |
+
"epoch": 11.61,
|
| 7578 |
+
"learning_rate": 5.400659247703307e-05,
|
| 7579 |
+
"loss": 0.3204,
|
| 7580 |
+
"step": 379000
|
| 7581 |
+
},
|
| 7582 |
+
{
|
| 7583 |
+
"epoch": 11.61,
|
| 7584 |
+
"eval_loss": 0.7782105803489685,
|
| 7585 |
+
"eval_runtime": 0.507,
|
| 7586 |
+
"eval_samples_per_second": 1972.197,
|
| 7587 |
+
"eval_steps_per_second": 31.555,
|
| 7588 |
+
"step": 379000
|
| 7589 |
+
},
|
| 7590 |
+
{
|
| 7591 |
+
"epoch": 11.63,
|
| 7592 |
+
"learning_rate": 5.36630752765745e-05,
|
| 7593 |
+
"loss": 0.3202,
|
| 7594 |
+
"step": 379500
|
| 7595 |
+
},
|
| 7596 |
+
{
|
| 7597 |
+
"epoch": 11.64,
|
| 7598 |
+
"learning_rate": 5.3320666280489146e-05,
|
| 7599 |
+
"loss": 0.3203,
|
| 7600 |
+
"step": 380000
|
| 7601 |
+
},
|
| 7602 |
+
{
|
| 7603 |
+
"epoch": 11.64,
|
| 7604 |
+
"eval_loss": 0.7813093662261963,
|
| 7605 |
+
"eval_runtime": 0.5119,
|
| 7606 |
+
"eval_samples_per_second": 1953.621,
|
| 7607 |
+
"eval_steps_per_second": 31.258,
|
| 7608 |
+
"step": 380000
|
| 7609 |
}
|
| 7610 |
],
|
| 7611 |
"max_steps": 500000,
|
| 7612 |
"num_train_epochs": 16,
|
| 7613 |
+
"total_flos": 1.2140423405959266e+22,
|
| 7614 |
"trial_name": null,
|
| 7615 |
"trial_params": null
|
| 7616 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d4bc41779e2d6304b9419d2772739a130d22e86fd6c9f3afbd2e0c3ee5419e4
|
| 3 |
size 102501541
|