Training in progress, step 2613, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -732,6 +732,16 @@ You can finetune this model on your own dataset.
|
|
| 732 |
| 0.4298 | 1496 | 1.1907 | 0.9647 | 0.5922 |
|
| 733 |
| 0.4550 | 1584 | 1.1587 | 0.9537 | 0.5585 |
|
| 734 |
| 0.4803 | 1672 | 0.9554 | 0.9304 | 0.5592 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 735 |
|
| 736 |
|
| 737 |
### Framework Versions
|
|
|
|
| 732 |
| 0.4298 | 1496 | 1.1907 | 0.9647 | 0.5922 |
|
| 733 |
| 0.4550 | 1584 | 1.1587 | 0.9537 | 0.5585 |
|
| 734 |
| 0.4803 | 1672 | 0.9554 | 0.9304 | 0.5592 |
|
| 735 |
+
| 0.5056 | 1760 | 0.9837 | 0.9165 | 0.5467 |
|
| 736 |
+
| 0.5309 | 1848 | 0.8857 | 0.8931 | 0.5374 |
|
| 737 |
+
| 0.5562 | 1936 | 0.9305 | 0.8842 | 0.5331 |
|
| 738 |
+
| 0.5814 | 2024 | 0.8061 | 0.8854 | 0.5477 |
|
| 739 |
+
| 0.6067 | 2112 | 0.8286 | 0.8693 | 0.5196 |
|
| 740 |
+
| 0.6320 | 2200 | 0.7854 | 0.8592 | 0.5159 |
|
| 741 |
+
| 0.6573 | 2288 | 0.8374 | 0.8538 | 0.5090 |
|
| 742 |
+
| 0.6826 | 2376 | 0.7678 | 0.8425 | 0.5175 |
|
| 743 |
+
| 0.7078 | 2464 | 0.7064 | 0.8284 | 0.5046 |
|
| 744 |
+
| 0.7331 | 2552 | 0.8849 | 0.8329 | 0.4783 |
|
| 745 |
|
| 746 |
|
| 747 |
### Framework Versions
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1130520122
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aaee732f14b229d04a0b6c6d028c92842350429f670d7910b9ab5d292210c922
|
| 3 |
size 1130520122
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 565251810
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a46ba1df33f0d186a8634e3fd3902123f7ce96112f85627e6a763e3d779be99b
|
| 3 |
size 565251810
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14180
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:51dc264fe435d10d1407e610654f4adbea838b132e0f6c5827047a283ee5ce28
|
| 3 |
size 14180
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54bc5d3e1ab7114cca6c72d26cc59c590fe581357d9bb65482f0e470a92fd4ae
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 88,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -444,6 +444,236 @@
|
|
| 444 |
"eval_scitail-pairs-pos_samples_per_second": 166.813,
|
| 445 |
"eval_scitail-pairs-pos_steps_per_second": 10.49,
|
| 446 |
"step": 1672
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 447 |
}
|
| 448 |
],
|
| 449 |
"logging_steps": 88,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.7506463659867854,
|
| 5 |
"eval_steps": 88,
|
| 6 |
+
"global_step": 2613,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 444 |
"eval_scitail-pairs-pos_samples_per_second": 166.813,
|
| 445 |
"eval_scitail-pairs-pos_steps_per_second": 10.49,
|
| 446 |
"step": 1672
|
| 447 |
+
},
|
| 448 |
+
{
|
| 449 |
+
"epoch": 0.5056018385521401,
|
| 450 |
+
"grad_norm": 8.821345329284668,
|
| 451 |
+
"learning_rate": 1.681910776921864e-05,
|
| 452 |
+
"loss": 0.9837,
|
| 453 |
+
"step": 1760
|
| 454 |
+
},
|
| 455 |
+
{
|
| 456 |
+
"epoch": 0.5056018385521401,
|
| 457 |
+
"eval_nli-pairs_loss": 0.9164705276489258,
|
| 458 |
+
"eval_nli-pairs_runtime": 38.0836,
|
| 459 |
+
"eval_nli-pairs_samples_per_second": 178.765,
|
| 460 |
+
"eval_nli-pairs_steps_per_second": 11.186,
|
| 461 |
+
"step": 1760
|
| 462 |
+
},
|
| 463 |
+
{
|
| 464 |
+
"epoch": 0.5056018385521401,
|
| 465 |
+
"eval_scitail-pairs-pos_loss": 0.5467000007629395,
|
| 466 |
+
"eval_scitail-pairs-pos_runtime": 7.7942,
|
| 467 |
+
"eval_scitail-pairs-pos_samples_per_second": 167.304,
|
| 468 |
+
"eval_scitail-pairs-pos_steps_per_second": 10.521,
|
| 469 |
+
"step": 1760
|
| 470 |
+
},
|
| 471 |
+
{
|
| 472 |
+
"epoch": 0.5308819304797472,
|
| 473 |
+
"grad_norm": 9.250692367553711,
|
| 474 |
+
"learning_rate": 1.5906153307778405e-05,
|
| 475 |
+
"loss": 0.8857,
|
| 476 |
+
"step": 1848
|
| 477 |
+
},
|
| 478 |
+
{
|
| 479 |
+
"epoch": 0.5308819304797472,
|
| 480 |
+
"eval_nli-pairs_loss": 0.8931341171264648,
|
| 481 |
+
"eval_nli-pairs_runtime": 38.0639,
|
| 482 |
+
"eval_nli-pairs_samples_per_second": 178.857,
|
| 483 |
+
"eval_nli-pairs_steps_per_second": 11.192,
|
| 484 |
+
"step": 1848
|
| 485 |
+
},
|
| 486 |
+
{
|
| 487 |
+
"epoch": 0.5308819304797472,
|
| 488 |
+
"eval_scitail-pairs-pos_loss": 0.5374401807785034,
|
| 489 |
+
"eval_scitail-pairs-pos_runtime": 7.8097,
|
| 490 |
+
"eval_scitail-pairs-pos_samples_per_second": 166.972,
|
| 491 |
+
"eval_scitail-pairs-pos_steps_per_second": 10.5,
|
| 492 |
+
"step": 1848
|
| 493 |
+
},
|
| 494 |
+
{
|
| 495 |
+
"epoch": 0.5561620224073542,
|
| 496 |
+
"grad_norm": 5.3266706466674805,
|
| 497 |
+
"learning_rate": 1.491028940034468e-05,
|
| 498 |
+
"loss": 0.9305,
|
| 499 |
+
"step": 1936
|
| 500 |
+
},
|
| 501 |
+
{
|
| 502 |
+
"epoch": 0.5561620224073542,
|
| 503 |
+
"eval_nli-pairs_loss": 0.8841533064842224,
|
| 504 |
+
"eval_nli-pairs_runtime": 38.1566,
|
| 505 |
+
"eval_nli-pairs_samples_per_second": 178.423,
|
| 506 |
+
"eval_nli-pairs_steps_per_second": 11.165,
|
| 507 |
+
"step": 1936
|
| 508 |
+
},
|
| 509 |
+
{
|
| 510 |
+
"epoch": 0.5561620224073542,
|
| 511 |
+
"eval_scitail-pairs-pos_loss": 0.5330824851989746,
|
| 512 |
+
"eval_scitail-pairs-pos_runtime": 7.8415,
|
| 513 |
+
"eval_scitail-pairs-pos_samples_per_second": 166.294,
|
| 514 |
+
"eval_scitail-pairs-pos_steps_per_second": 10.457,
|
| 515 |
+
"step": 1936
|
| 516 |
+
},
|
| 517 |
+
{
|
| 518 |
+
"epoch": 0.5814421143349612,
|
| 519 |
+
"grad_norm": 6.629028797149658,
|
| 520 |
+
"learning_rate": 1.3845495793217223e-05,
|
| 521 |
+
"loss": 0.8061,
|
| 522 |
+
"step": 2024
|
| 523 |
+
},
|
| 524 |
+
{
|
| 525 |
+
"epoch": 0.5814421143349612,
|
| 526 |
+
"eval_nli-pairs_loss": 0.8853806257247925,
|
| 527 |
+
"eval_nli-pairs_runtime": 38.172,
|
| 528 |
+
"eval_nli-pairs_samples_per_second": 178.351,
|
| 529 |
+
"eval_nli-pairs_steps_per_second": 11.16,
|
| 530 |
+
"step": 2024
|
| 531 |
+
},
|
| 532 |
+
{
|
| 533 |
+
"epoch": 0.5814421143349612,
|
| 534 |
+
"eval_scitail-pairs-pos_loss": 0.5477445125579834,
|
| 535 |
+
"eval_scitail-pairs-pos_runtime": 7.8333,
|
| 536 |
+
"eval_scitail-pairs-pos_samples_per_second": 166.469,
|
| 537 |
+
"eval_scitail-pairs-pos_steps_per_second": 10.468,
|
| 538 |
+
"step": 2024
|
| 539 |
+
},
|
| 540 |
+
{
|
| 541 |
+
"epoch": 0.6067222062625682,
|
| 542 |
+
"grad_norm": 4.16071081161499,
|
| 543 |
+
"learning_rate": 1.2726719854583736e-05,
|
| 544 |
+
"loss": 0.8286,
|
| 545 |
+
"step": 2112
|
| 546 |
+
},
|
| 547 |
+
{
|
| 548 |
+
"epoch": 0.6067222062625682,
|
| 549 |
+
"eval_nli-pairs_loss": 0.8693087697029114,
|
| 550 |
+
"eval_nli-pairs_runtime": 38.1088,
|
| 551 |
+
"eval_nli-pairs_samples_per_second": 178.646,
|
| 552 |
+
"eval_nli-pairs_steps_per_second": 11.179,
|
| 553 |
+
"step": 2112
|
| 554 |
+
},
|
| 555 |
+
{
|
| 556 |
+
"epoch": 0.6067222062625682,
|
| 557 |
+
"eval_scitail-pairs-pos_loss": 0.5196370482444763,
|
| 558 |
+
"eval_scitail-pairs-pos_runtime": 7.8534,
|
| 559 |
+
"eval_scitail-pairs-pos_samples_per_second": 166.042,
|
| 560 |
+
"eval_scitail-pairs-pos_steps_per_second": 10.441,
|
| 561 |
+
"step": 2112
|
| 562 |
+
},
|
| 563 |
+
{
|
| 564 |
+
"epoch": 0.6320022981901753,
|
| 565 |
+
"grad_norm": 2.518064498901367,
|
| 566 |
+
"learning_rate": 1.1569666746235527e-05,
|
| 567 |
+
"loss": 0.7854,
|
| 568 |
+
"step": 2200
|
| 569 |
+
},
|
| 570 |
+
{
|
| 571 |
+
"epoch": 0.6320022981901753,
|
| 572 |
+
"eval_nli-pairs_loss": 0.859151303768158,
|
| 573 |
+
"eval_nli-pairs_runtime": 38.0838,
|
| 574 |
+
"eval_nli-pairs_samples_per_second": 178.764,
|
| 575 |
+
"eval_nli-pairs_steps_per_second": 11.186,
|
| 576 |
+
"step": 2200
|
| 577 |
+
},
|
| 578 |
+
{
|
| 579 |
+
"epoch": 0.6320022981901753,
|
| 580 |
+
"eval_scitail-pairs-pos_loss": 0.5159358978271484,
|
| 581 |
+
"eval_scitail-pairs-pos_runtime": 7.7611,
|
| 582 |
+
"eval_scitail-pairs-pos_samples_per_second": 168.018,
|
| 583 |
+
"eval_scitail-pairs-pos_steps_per_second": 10.566,
|
| 584 |
+
"step": 2200
|
| 585 |
+
},
|
| 586 |
+
{
|
| 587 |
+
"epoch": 0.6572823901177822,
|
| 588 |
+
"grad_norm": 4.033371925354004,
|
| 589 |
+
"learning_rate": 1.0390578957522117e-05,
|
| 590 |
+
"loss": 0.8374,
|
| 591 |
+
"step": 2288
|
| 592 |
+
},
|
| 593 |
+
{
|
| 594 |
+
"epoch": 0.6572823901177822,
|
| 595 |
+
"eval_nli-pairs_loss": 0.8537901043891907,
|
| 596 |
+
"eval_nli-pairs_runtime": 38.0742,
|
| 597 |
+
"eval_nli-pairs_samples_per_second": 178.809,
|
| 598 |
+
"eval_nli-pairs_steps_per_second": 11.189,
|
| 599 |
+
"step": 2288
|
| 600 |
+
},
|
| 601 |
+
{
|
| 602 |
+
"epoch": 0.6572823901177822,
|
| 603 |
+
"eval_scitail-pairs-pos_loss": 0.509048581123352,
|
| 604 |
+
"eval_scitail-pairs-pos_runtime": 7.7812,
|
| 605 |
+
"eval_scitail-pairs-pos_samples_per_second": 167.582,
|
| 606 |
+
"eval_scitail-pairs-pos_steps_per_second": 10.538,
|
| 607 |
+
"step": 2288
|
| 608 |
+
},
|
| 609 |
+
{
|
| 610 |
+
"epoch": 0.6825624820453893,
|
| 611 |
+
"grad_norm": 6.1265363693237305,
|
| 612 |
+
"learning_rate": 9.206008296404724e-06,
|
| 613 |
+
"loss": 0.7678,
|
| 614 |
+
"step": 2376
|
| 615 |
+
},
|
| 616 |
+
{
|
| 617 |
+
"epoch": 0.6825624820453893,
|
| 618 |
+
"eval_nli-pairs_loss": 0.8425480723381042,
|
| 619 |
+
"eval_nli-pairs_runtime": 38.0596,
|
| 620 |
+
"eval_nli-pairs_samples_per_second": 178.877,
|
| 621 |
+
"eval_nli-pairs_steps_per_second": 11.193,
|
| 622 |
+
"step": 2376
|
| 623 |
+
},
|
| 624 |
+
{
|
| 625 |
+
"epoch": 0.6825624820453893,
|
| 626 |
+
"eval_scitail-pairs-pos_loss": 0.5174906253814697,
|
| 627 |
+
"eval_scitail-pairs-pos_runtime": 7.7617,
|
| 628 |
+
"eval_scitail-pairs-pos_samples_per_second": 168.003,
|
| 629 |
+
"eval_scitail-pairs-pos_steps_per_second": 10.565,
|
| 630 |
+
"step": 2376
|
| 631 |
+
},
|
| 632 |
+
{
|
| 633 |
+
"epoch": 0.7078425739729962,
|
| 634 |
+
"grad_norm": 3.0078606605529785,
|
| 635 |
+
"learning_rate": 8.032583538354534e-06,
|
| 636 |
+
"loss": 0.7064,
|
| 637 |
+
"step": 2464
|
| 638 |
+
},
|
| 639 |
+
{
|
| 640 |
+
"epoch": 0.7078425739729962,
|
| 641 |
+
"eval_nli-pairs_loss": 0.8283973336219788,
|
| 642 |
+
"eval_nli-pairs_runtime": 38.2909,
|
| 643 |
+
"eval_nli-pairs_samples_per_second": 177.797,
|
| 644 |
+
"eval_nli-pairs_steps_per_second": 11.125,
|
| 645 |
+
"step": 2464
|
| 646 |
+
},
|
| 647 |
+
{
|
| 648 |
+
"epoch": 0.7078425739729962,
|
| 649 |
+
"eval_scitail-pairs-pos_loss": 0.5045931935310364,
|
| 650 |
+
"eval_scitail-pairs-pos_runtime": 7.8174,
|
| 651 |
+
"eval_scitail-pairs-pos_samples_per_second": 166.806,
|
| 652 |
+
"eval_scitail-pairs-pos_steps_per_second": 10.489,
|
| 653 |
+
"step": 2464
|
| 654 |
+
},
|
| 655 |
+
{
|
| 656 |
+
"epoch": 0.7331226659006033,
|
| 657 |
+
"grad_norm": 8.649880409240723,
|
| 658 |
+
"learning_rate": 6.8867769947957765e-06,
|
| 659 |
+
"loss": 0.8849,
|
| 660 |
+
"step": 2552
|
| 661 |
+
},
|
| 662 |
+
{
|
| 663 |
+
"epoch": 0.7331226659006033,
|
| 664 |
+
"eval_nli-pairs_loss": 0.8328748941421509,
|
| 665 |
+
"eval_nli-pairs_runtime": 38.2288,
|
| 666 |
+
"eval_nli-pairs_samples_per_second": 178.086,
|
| 667 |
+
"eval_nli-pairs_steps_per_second": 11.143,
|
| 668 |
+
"step": 2552
|
| 669 |
+
},
|
| 670 |
+
{
|
| 671 |
+
"epoch": 0.7331226659006033,
|
| 672 |
+
"eval_scitail-pairs-pos_loss": 0.478294312953949,
|
| 673 |
+
"eval_scitail-pairs-pos_runtime": 7.8918,
|
| 674 |
+
"eval_scitail-pairs-pos_samples_per_second": 165.235,
|
| 675 |
+
"eval_scitail-pairs-pos_steps_per_second": 10.391,
|
| 676 |
+
"step": 2552
|
| 677 |
}
|
| 678 |
],
|
| 679 |
"logging_steps": 88,
|