Upload folder using huggingface_hub
Browse files- adapter_model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +3 -203
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 161515608
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1238e11a7bd83abb0038d7f1ee20d6d90f9c39b3e70e08a93260b11901cee5c5
|
| 3 |
size 161515608
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 323181259
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0230fe0e059307ec2503aabf08f5e2bde7daf4b09ee960fcb69b3dfba125cec1
|
| 3 |
size 323181259
|
rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7063580a565cb4ab0c1d36b25d817a35a16d1f21f4a993a9f25cdba6efadcb9d
|
| 3 |
size 14645
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0cc1343ebe01037162a057bcccefc9f328f82750a217d5974a02a6ad6a4bc5ce
|
| 3 |
size 1465
|
trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -558,206 +558,6 @@
|
|
| 558 |
"mean_token_accuracy": 0.765391580760479,
|
| 559 |
"num_tokens": 204197.0,
|
| 560 |
"step": 55
|
| 561 |
-
},
|
| 562 |
-
{
|
| 563 |
-
"entropy": 1.411361187696457,
|
| 564 |
-
"epoch": 3.116788321167883,
|
| 565 |
-
"grad_norm": 4.21875,
|
| 566 |
-
"learning_rate": 1.3432073050985201e-05,
|
| 567 |
-
"loss": 0.7665,
|
| 568 |
-
"mean_token_accuracy": 0.7553833983838558,
|
| 569 |
-
"num_tokens": 207610.0,
|
| 570 |
-
"step": 56
|
| 571 |
-
},
|
| 572 |
-
{
|
| 573 |
-
"entropy": 1.3223325684666634,
|
| 574 |
-
"epoch": 3.1751824817518246,
|
| 575 |
-
"grad_norm": 3.71875,
|
| 576 |
-
"learning_rate": 1.2808754571563827e-05,
|
| 577 |
-
"loss": 0.804,
|
| 578 |
-
"mean_token_accuracy": 0.7530029378831387,
|
| 579 |
-
"num_tokens": 211730.0,
|
| 580 |
-
"step": 57
|
| 581 |
-
},
|
| 582 |
-
{
|
| 583 |
-
"entropy": 1.2704328149557114,
|
| 584 |
-
"epoch": 3.2335766423357666,
|
| 585 |
-
"grad_norm": 3.46875,
|
| 586 |
-
"learning_rate": 1.2189280281214128e-05,
|
| 587 |
-
"loss": 0.7542,
|
| 588 |
-
"mean_token_accuracy": 0.775670263916254,
|
| 589 |
-
"num_tokens": 216415.0,
|
| 590 |
-
"step": 58
|
| 591 |
-
},
|
| 592 |
-
{
|
| 593 |
-
"entropy": 1.3555709198117256,
|
| 594 |
-
"epoch": 3.291970802919708,
|
| 595 |
-
"grad_norm": 3.9375,
|
| 596 |
-
"learning_rate": 1.1574736948340163e-05,
|
| 597 |
-
"loss": 0.7992,
|
| 598 |
-
"mean_token_accuracy": 0.7488890923559666,
|
| 599 |
-
"num_tokens": 219953.0,
|
| 600 |
-
"step": 59
|
| 601 |
-
},
|
| 602 |
-
{
|
| 603 |
-
"entropy": 1.2632866501808167,
|
| 604 |
-
"epoch": 3.3503649635036497,
|
| 605 |
-
"grad_norm": 3.578125,
|
| 606 |
-
"learning_rate": 1.0966202690771015e-05,
|
| 607 |
-
"loss": 0.75,
|
| 608 |
-
"mean_token_accuracy": 0.7654453739523888,
|
| 609 |
-
"num_tokens": 224335.0,
|
| 610 |
-
"step": 60
|
| 611 |
-
},
|
| 612 |
-
{
|
| 613 |
-
"entropy": 1.2773741334676743,
|
| 614 |
-
"epoch": 3.408759124087591,
|
| 615 |
-
"grad_norm": 4.125,
|
| 616 |
-
"learning_rate": 1.036474508437579e-05,
|
| 617 |
-
"loss": 0.8394,
|
| 618 |
-
"mean_token_accuracy": 0.7538279145956039,
|
| 619 |
-
"num_tokens": 228300.0,
|
| 620 |
-
"step": 61
|
| 621 |
-
},
|
| 622 |
-
{
|
| 623 |
-
"entropy": 1.2203935906291008,
|
| 624 |
-
"epoch": 3.4671532846715327,
|
| 625 |
-
"grad_norm": 4.3125,
|
| 626 |
-
"learning_rate": 9.771419290172776e-06,
|
| 627 |
-
"loss": 0.7866,
|
| 628 |
-
"mean_token_accuracy": 0.7759390734136105,
|
| 629 |
-
"num_tokens": 231820.0,
|
| 630 |
-
"step": 62
|
| 631 |
-
},
|
| 632 |
-
{
|
| 633 |
-
"entropy": 1.2281916178762913,
|
| 634 |
-
"epoch": 3.5255474452554747,
|
| 635 |
-
"grad_norm": 4.5,
|
| 636 |
-
"learning_rate": 9.187266203218457e-06,
|
| 637 |
-
"loss": 0.7456,
|
| 638 |
-
"mean_token_accuracy": 0.7896540127694607,
|
| 639 |
-
"num_tokens": 235502.0,
|
| 640 |
-
"step": 63
|
| 641 |
-
},
|
| 642 |
-
{
|
| 643 |
-
"entropy": 1.1479723155498505,
|
| 644 |
-
"epoch": 3.5839416058394162,
|
| 645 |
-
"grad_norm": 3.84375,
|
| 646 |
-
"learning_rate": 8.61331062652391e-06,
|
| 647 |
-
"loss": 0.6779,
|
| 648 |
-
"mean_token_accuracy": 0.7954859808087349,
|
| 649 |
-
"num_tokens": 239847.0,
|
| 650 |
-
"step": 64
|
| 651 |
-
},
|
| 652 |
-
{
|
| 653 |
-
"entropy": 1.227071214467287,
|
| 654 |
-
"epoch": 3.6423357664233578,
|
| 655 |
-
"grad_norm": 4.78125,
|
| 656 |
-
"learning_rate": 8.050559473202078e-06,
|
| 657 |
-
"loss": 0.7642,
|
| 658 |
-
"mean_token_accuracy": 0.7581925354897976,
|
| 659 |
-
"num_tokens": 243356.0,
|
| 660 |
-
"step": 65
|
| 661 |
-
},
|
| 662 |
-
{
|
| 663 |
-
"entropy": 1.131257489323616,
|
| 664 |
-
"epoch": 3.7007299270072993,
|
| 665 |
-
"grad_norm": 3.5625,
|
| 666 |
-
"learning_rate": 7.500000000000004e-06,
|
| 667 |
-
"loss": 0.7819,
|
| 668 |
-
"mean_token_accuracy": 0.7654204778373241,
|
| 669 |
-
"num_tokens": 249682.0,
|
| 670 |
-
"step": 66
|
| 671 |
-
},
|
| 672 |
-
{
|
| 673 |
-
"entropy": 1.16723557934165,
|
| 674 |
-
"epoch": 3.759124087591241,
|
| 675 |
-
"grad_norm": 4.5,
|
| 676 |
-
"learning_rate": 6.962598075315047e-06,
|
| 677 |
-
"loss": 0.6689,
|
| 678 |
-
"mean_token_accuracy": 0.783266007900238,
|
| 679 |
-
"num_tokens": 253238.0,
|
| 680 |
-
"step": 67
|
| 681 |
-
},
|
| 682 |
-
{
|
| 683 |
-
"entropy": 1.2070689871907234,
|
| 684 |
-
"epoch": 3.8175182481751824,
|
| 685 |
-
"grad_norm": 5.1875,
|
| 686 |
-
"learning_rate": 6.439296484733526e-06,
|
| 687 |
-
"loss": 0.7421,
|
| 688 |
-
"mean_token_accuracy": 0.7796755991876125,
|
| 689 |
-
"num_tokens": 256423.0,
|
| 690 |
-
"step": 68
|
| 691 |
-
},
|
| 692 |
-
{
|
| 693 |
-
"entropy": 1.1488405130803585,
|
| 694 |
-
"epoch": 3.875912408759124,
|
| 695 |
-
"grad_norm": 5.34375,
|
| 696 |
-
"learning_rate": 5.931013277064377e-06,
|
| 697 |
-
"loss": 0.7267,
|
| 698 |
-
"mean_token_accuracy": 0.7691169492900372,
|
| 699 |
-
"num_tokens": 259934.0,
|
| 700 |
-
"step": 69
|
| 701 |
-
},
|
| 702 |
-
{
|
| 703 |
-
"entropy": 1.130510926246643,
|
| 704 |
-
"epoch": 3.9343065693430654,
|
| 705 |
-
"grad_norm": 5.25,
|
| 706 |
-
"learning_rate": 5.438640153769654e-06,
|
| 707 |
-
"loss": 0.7209,
|
| 708 |
-
"mean_token_accuracy": 0.7871466726064682,
|
| 709 |
-
"num_tokens": 263187.0,
|
| 710 |
-
"step": 70
|
| 711 |
-
},
|
| 712 |
-
{
|
| 713 |
-
"entropy": 1.1477855034172535,
|
| 714 |
-
"epoch": 3.9927007299270074,
|
| 715 |
-
"grad_norm": 4.75,
|
| 716 |
-
"learning_rate": 4.963040904617131e-06,
|
| 717 |
-
"loss": 0.7762,
|
| 718 |
-
"mean_token_accuracy": 0.7656804099678993,
|
| 719 |
-
"num_tokens": 267097.0,
|
| 720 |
-
"step": 71
|
| 721 |
-
},
|
| 722 |
-
{
|
| 723 |
-
"entropy": 1.09878408908844,
|
| 724 |
-
"epoch": 4.0,
|
| 725 |
-
"grad_norm": 12.875,
|
| 726 |
-
"learning_rate": 4.505049892299517e-06,
|
| 727 |
-
"loss": 0.7072,
|
| 728 |
-
"mean_token_accuracy": 0.7617444694042206,
|
| 729 |
-
"num_tokens": 267588.0,
|
| 730 |
-
"step": 72
|
| 731 |
-
},
|
| 732 |
-
{
|
| 733 |
-
"entropy": 1.0318926461040974,
|
| 734 |
-
"epoch": 4.0583941605839415,
|
| 735 |
-
"grad_norm": 4.28125,
|
| 736 |
-
"learning_rate": 4.06547058867883e-06,
|
| 737 |
-
"loss": 0.5992,
|
| 738 |
-
"mean_token_accuracy": 0.8166146464645863,
|
| 739 |
-
"num_tokens": 271589.0,
|
| 740 |
-
"step": 73
|
| 741 |
-
},
|
| 742 |
-
{
|
| 743 |
-
"entropy": 1.1504660807549953,
|
| 744 |
-
"epoch": 4.116788321167883,
|
| 745 |
-
"grad_norm": 4.78125,
|
| 746 |
-
"learning_rate": 3.645074165223656e-06,
|
| 747 |
-
"loss": 0.606,
|
| 748 |
-
"mean_token_accuracy": 0.8282722532749176,
|
| 749 |
-
"num_tokens": 274468.0,
|
| 750 |
-
"step": 74
|
| 751 |
-
},
|
| 752 |
-
{
|
| 753 |
-
"entropy": 1.1046061255037785,
|
| 754 |
-
"epoch": 4.175182481751825,
|
| 755 |
-
"grad_norm": 3.671875,
|
| 756 |
-
"learning_rate": 3.244598140112404e-06,
|
| 757 |
-
"loss": 0.6325,
|
| 758 |
-
"mean_token_accuracy": 0.8047133162617683,
|
| 759 |
-
"num_tokens": 278830.0,
|
| 760 |
-
"step": 75
|
| 761 |
}
|
| 762 |
],
|
| 763 |
"logging_steps": 1,
|
|
@@ -777,7 +577,7 @@
|
|
| 777 |
"attributes": {}
|
| 778 |
}
|
| 779 |
},
|
| 780 |
-
"total_flos":
|
| 781 |
"train_batch_size": 2,
|
| 782 |
"trial_name": null,
|
| 783 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 3.0583941605839415,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 55,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 558 |
"mean_token_accuracy": 0.765391580760479,
|
| 559 |
"num_tokens": 204197.0,
|
| 560 |
"step": 55
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 561 |
}
|
| 562 |
],
|
| 563 |
"logging_steps": 1,
|
|
|
|
| 577 |
"attributes": {}
|
| 578 |
}
|
| 579 |
},
|
| 580 |
+
"total_flos": 5469020090400768.0,
|
| 581 |
"train_batch_size": 2,
|
| 582 |
"trial_name": null,
|
| 583 |
"trial_params": null
|