Upload folder using huggingface_hub
Browse files- adapter_model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +7 -307
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 369133600
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:213b096de341a50e0a4985859c6ea12250ce2d41200faa95746f3833d77778ba
|
| 3 |
size 369133600
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 738413771
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:96b2ad4cb92f0741196486f703f3eac971910ec2af67ea8dced1c39144ae41bd
|
| 3 |
size 738413771
|
rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a98db1d36dba143442348323cec201512ec884401e693908cf5136c8d261220e
|
| 3 |
size 14645
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62f9a2cf5827a9b18dca293c2a36bb613a8624ee231ceb47fae6e66a15cfcac4
|
| 3 |
size 1465
|
trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 0.
|
| 4 |
-
"best_model_checkpoint": "/content/drive/MyDrive/lora_model/outputs/task15_microsoft/Phi-4-mini-instruct/checkpoint-
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 1,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -608,306 +608,6 @@
|
|
| 608 |
"eval_samples_per_second": 8.91,
|
| 609 |
"eval_steps_per_second": 1.188,
|
| 610 |
"step": 40
|
| 611 |
-
},
|
| 612 |
-
{
|
| 613 |
-
"epoch": 2.1578947368421053,
|
| 614 |
-
"grad_norm": 0.4683343768119812,
|
| 615 |
-
"learning_rate": 0.00020658795558326743,
|
| 616 |
-
"loss": 0.7401,
|
| 617 |
-
"step": 41
|
| 618 |
-
},
|
| 619 |
-
{
|
| 620 |
-
"epoch": 2.1578947368421053,
|
| 621 |
-
"eval_loss": 0.7358477711677551,
|
| 622 |
-
"eval_runtime": 3.3619,
|
| 623 |
-
"eval_samples_per_second": 8.924,
|
| 624 |
-
"eval_steps_per_second": 1.19,
|
| 625 |
-
"step": 41
|
| 626 |
-
},
|
| 627 |
-
{
|
| 628 |
-
"epoch": 2.2105263157894735,
|
| 629 |
-
"grad_norm": 0.6029678583145142,
|
| 630 |
-
"learning_rate": 0.0001895195261000831,
|
| 631 |
-
"loss": 0.6829,
|
| 632 |
-
"step": 42
|
| 633 |
-
},
|
| 634 |
-
{
|
| 635 |
-
"epoch": 2.2105263157894735,
|
| 636 |
-
"eval_loss": 0.7268175482749939,
|
| 637 |
-
"eval_runtime": 3.3732,
|
| 638 |
-
"eval_samples_per_second": 8.894,
|
| 639 |
-
"eval_steps_per_second": 1.186,
|
| 640 |
-
"step": 42
|
| 641 |
-
},
|
| 642 |
-
{
|
| 643 |
-
"epoch": 2.263157894736842,
|
| 644 |
-
"grad_norm": 0.6847506761550903,
|
| 645 |
-
"learning_rate": 0.00017274575140626317,
|
| 646 |
-
"loss": 0.7923,
|
| 647 |
-
"step": 43
|
| 648 |
-
},
|
| 649 |
-
{
|
| 650 |
-
"epoch": 2.263157894736842,
|
| 651 |
-
"eval_loss": 0.7140093445777893,
|
| 652 |
-
"eval_runtime": 3.3766,
|
| 653 |
-
"eval_samples_per_second": 8.885,
|
| 654 |
-
"eval_steps_per_second": 1.185,
|
| 655 |
-
"step": 43
|
| 656 |
-
},
|
| 657 |
-
{
|
| 658 |
-
"epoch": 2.3157894736842106,
|
| 659 |
-
"grad_norm": 0.6127113699913025,
|
| 660 |
-
"learning_rate": 0.00015634835164602198,
|
| 661 |
-
"loss": 0.7396,
|
| 662 |
-
"step": 44
|
| 663 |
-
},
|
| 664 |
-
{
|
| 665 |
-
"epoch": 2.3157894736842106,
|
| 666 |
-
"eval_loss": 0.6983242034912109,
|
| 667 |
-
"eval_runtime": 3.3684,
|
| 668 |
-
"eval_samples_per_second": 8.906,
|
| 669 |
-
"eval_steps_per_second": 1.188,
|
| 670 |
-
"step": 44
|
| 671 |
-
},
|
| 672 |
-
{
|
| 673 |
-
"epoch": 2.3684210526315788,
|
| 674 |
-
"grad_norm": 0.538176953792572,
|
| 675 |
-
"learning_rate": 0.00014040721330273062,
|
| 676 |
-
"loss": 0.6553,
|
| 677 |
-
"step": 45
|
| 678 |
-
},
|
| 679 |
-
{
|
| 680 |
-
"epoch": 2.3684210526315788,
|
| 681 |
-
"eval_loss": 0.6850975155830383,
|
| 682 |
-
"eval_runtime": 3.3723,
|
| 683 |
-
"eval_samples_per_second": 8.896,
|
| 684 |
-
"eval_steps_per_second": 1.186,
|
| 685 |
-
"step": 45
|
| 686 |
-
},
|
| 687 |
-
{
|
| 688 |
-
"epoch": 2.4210526315789473,
|
| 689 |
-
"grad_norm": 0.6419486999511719,
|
| 690 |
-
"learning_rate": 0.00012500000000000006,
|
| 691 |
-
"loss": 0.7364,
|
| 692 |
-
"step": 46
|
| 693 |
-
},
|
| 694 |
-
{
|
| 695 |
-
"epoch": 2.4210526315789473,
|
| 696 |
-
"eval_loss": 0.6766163110733032,
|
| 697 |
-
"eval_runtime": 3.3706,
|
| 698 |
-
"eval_samples_per_second": 8.9,
|
| 699 |
-
"eval_steps_per_second": 1.187,
|
| 700 |
-
"step": 46
|
| 701 |
-
},
|
| 702 |
-
{
|
| 703 |
-
"epoch": 2.473684210526316,
|
| 704 |
-
"grad_norm": 0.5997453331947327,
|
| 705 |
-
"learning_rate": 0.00011020177413231333,
|
| 706 |
-
"loss": 0.6901,
|
| 707 |
-
"step": 47
|
| 708 |
-
},
|
| 709 |
-
{
|
| 710 |
-
"epoch": 2.473684210526316,
|
| 711 |
-
"eval_loss": 0.667664110660553,
|
| 712 |
-
"eval_runtime": 3.3701,
|
| 713 |
-
"eval_samples_per_second": 8.902,
|
| 714 |
-
"eval_steps_per_second": 1.187,
|
| 715 |
-
"step": 47
|
| 716 |
-
},
|
| 717 |
-
{
|
| 718 |
-
"epoch": 2.526315789473684,
|
| 719 |
-
"grad_norm": 0.5617692470550537,
|
| 720 |
-
"learning_rate": 9.608463116858542e-05,
|
| 721 |
-
"loss": 0.6299,
|
| 722 |
-
"step": 48
|
| 723 |
-
},
|
| 724 |
-
{
|
| 725 |
-
"epoch": 2.526315789473684,
|
| 726 |
-
"eval_loss": 0.658656895160675,
|
| 727 |
-
"eval_runtime": 3.3698,
|
| 728 |
-
"eval_samples_per_second": 8.903,
|
| 729 |
-
"eval_steps_per_second": 1.187,
|
| 730 |
-
"step": 48
|
| 731 |
-
},
|
| 732 |
-
{
|
| 733 |
-
"epoch": 2.5789473684210527,
|
| 734 |
-
"grad_norm": 0.5850865840911865,
|
| 735 |
-
"learning_rate": 8.271734841028553e-05,
|
| 736 |
-
"loss": 0.717,
|
| 737 |
-
"step": 49
|
| 738 |
-
},
|
| 739 |
-
{
|
| 740 |
-
"epoch": 2.5789473684210527,
|
| 741 |
-
"eval_loss": 0.6522302627563477,
|
| 742 |
-
"eval_runtime": 3.3689,
|
| 743 |
-
"eval_samples_per_second": 8.905,
|
| 744 |
-
"eval_steps_per_second": 1.187,
|
| 745 |
-
"step": 49
|
| 746 |
-
},
|
| 747 |
-
{
|
| 748 |
-
"epoch": 2.6315789473684212,
|
| 749 |
-
"grad_norm": 0.5645343661308289,
|
| 750 |
-
"learning_rate": 7.016504991533726e-05,
|
| 751 |
-
"loss": 0.6396,
|
| 752 |
-
"step": 50
|
| 753 |
-
},
|
| 754 |
-
{
|
| 755 |
-
"epoch": 2.6315789473684212,
|
| 756 |
-
"eval_loss": 0.6460065841674805,
|
| 757 |
-
"eval_runtime": 3.3689,
|
| 758 |
-
"eval_samples_per_second": 8.905,
|
| 759 |
-
"eval_steps_per_second": 1.187,
|
| 760 |
-
"step": 50
|
| 761 |
-
},
|
| 762 |
-
{
|
| 763 |
-
"epoch": 2.6842105263157894,
|
| 764 |
-
"grad_norm": 0.6689581871032715,
|
| 765 |
-
"learning_rate": 5.848888922025553e-05,
|
| 766 |
-
"loss": 0.6803,
|
| 767 |
-
"step": 51
|
| 768 |
-
},
|
| 769 |
-
{
|
| 770 |
-
"epoch": 2.6842105263157894,
|
| 771 |
-
"eval_loss": 0.6413608193397522,
|
| 772 |
-
"eval_runtime": 3.3668,
|
| 773 |
-
"eval_samples_per_second": 8.911,
|
| 774 |
-
"eval_steps_per_second": 1.188,
|
| 775 |
-
"step": 51
|
| 776 |
-
},
|
| 777 |
-
{
|
| 778 |
-
"epoch": 2.736842105263158,
|
| 779 |
-
"grad_norm": 0.5473130941390991,
|
| 780 |
-
"learning_rate": 4.7745751406263163e-05,
|
| 781 |
-
"loss": 0.6535,
|
| 782 |
-
"step": 52
|
| 783 |
-
},
|
| 784 |
-
{
|
| 785 |
-
"epoch": 2.736842105263158,
|
| 786 |
-
"eval_loss": 0.6371581554412842,
|
| 787 |
-
"eval_runtime": 3.371,
|
| 788 |
-
"eval_samples_per_second": 8.9,
|
| 789 |
-
"eval_steps_per_second": 1.187,
|
| 790 |
-
"step": 52
|
| 791 |
-
},
|
| 792 |
-
{
|
| 793 |
-
"epoch": 2.7894736842105265,
|
| 794 |
-
"grad_norm": 0.6696008443832397,
|
| 795 |
-
"learning_rate": 3.798797596089351e-05,
|
| 796 |
-
"loss": 0.744,
|
| 797 |
-
"step": 53
|
| 798 |
-
},
|
| 799 |
-
{
|
| 800 |
-
"epoch": 2.7894736842105265,
|
| 801 |
-
"eval_loss": 0.6349052786827087,
|
| 802 |
-
"eval_runtime": 3.3769,
|
| 803 |
-
"eval_samples_per_second": 8.884,
|
| 804 |
-
"eval_steps_per_second": 1.185,
|
| 805 |
-
"step": 53
|
| 806 |
-
},
|
| 807 |
-
{
|
| 808 |
-
"epoch": 2.8421052631578947,
|
| 809 |
-
"grad_norm": 0.5501115918159485,
|
| 810 |
-
"learning_rate": 2.9263101785268254e-05,
|
| 811 |
-
"loss": 0.6701,
|
| 812 |
-
"step": 54
|
| 813 |
-
},
|
| 814 |
-
{
|
| 815 |
-
"epoch": 2.8421052631578947,
|
| 816 |
-
"eval_loss": 0.6328269839286804,
|
| 817 |
-
"eval_runtime": 3.3789,
|
| 818 |
-
"eval_samples_per_second": 8.879,
|
| 819 |
-
"eval_steps_per_second": 1.184,
|
| 820 |
-
"step": 54
|
| 821 |
-
},
|
| 822 |
-
{
|
| 823 |
-
"epoch": 2.8947368421052633,
|
| 824 |
-
"grad_norm": 0.4939638078212738,
|
| 825 |
-
"learning_rate": 2.1613635589349755e-05,
|
| 826 |
-
"loss": 0.6646,
|
| 827 |
-
"step": 55
|
| 828 |
-
},
|
| 829 |
-
{
|
| 830 |
-
"epoch": 2.8947368421052633,
|
| 831 |
-
"eval_loss": 0.6300433278083801,
|
| 832 |
-
"eval_runtime": 3.3726,
|
| 833 |
-
"eval_samples_per_second": 8.895,
|
| 834 |
-
"eval_steps_per_second": 1.186,
|
| 835 |
-
"step": 55
|
| 836 |
-
},
|
| 837 |
-
{
|
| 838 |
-
"epoch": 2.9473684210526314,
|
| 839 |
-
"grad_norm": 0.61789470911026,
|
| 840 |
-
"learning_rate": 1.5076844803522921e-05,
|
| 841 |
-
"loss": 0.7293,
|
| 842 |
-
"step": 56
|
| 843 |
-
},
|
| 844 |
-
{
|
| 845 |
-
"epoch": 2.9473684210526314,
|
| 846 |
-
"eval_loss": 0.6280367374420166,
|
| 847 |
-
"eval_runtime": 3.3765,
|
| 848 |
-
"eval_samples_per_second": 8.885,
|
| 849 |
-
"eval_steps_per_second": 1.185,
|
| 850 |
-
"step": 56
|
| 851 |
-
},
|
| 852 |
-
{
|
| 853 |
-
"epoch": 3.0,
|
| 854 |
-
"grad_norm": 0.5384323000907898,
|
| 855 |
-
"learning_rate": 9.684576015420277e-06,
|
| 856 |
-
"loss": 0.6898,
|
| 857 |
-
"step": 57
|
| 858 |
-
},
|
| 859 |
-
{
|
| 860 |
-
"epoch": 3.0,
|
| 861 |
-
"eval_loss": 0.6272571086883545,
|
| 862 |
-
"eval_runtime": 3.3704,
|
| 863 |
-
"eval_samples_per_second": 8.901,
|
| 864 |
-
"eval_steps_per_second": 1.187,
|
| 865 |
-
"step": 57
|
| 866 |
-
},
|
| 867 |
-
{
|
| 868 |
-
"epoch": 3.0526315789473686,
|
| 869 |
-
"grad_norm": 0.5376719236373901,
|
| 870 |
-
"learning_rate": 5.463099816548578e-06,
|
| 871 |
-
"loss": 0.6448,
|
| 872 |
-
"step": 58
|
| 873 |
-
},
|
| 874 |
-
{
|
| 875 |
-
"epoch": 3.0526315789473686,
|
| 876 |
-
"eval_loss": 0.6269372701644897,
|
| 877 |
-
"eval_runtime": 3.3667,
|
| 878 |
-
"eval_samples_per_second": 8.911,
|
| 879 |
-
"eval_steps_per_second": 1.188,
|
| 880 |
-
"step": 58
|
| 881 |
-
},
|
| 882 |
-
{
|
| 883 |
-
"epoch": 3.1052631578947367,
|
| 884 |
-
"grad_norm": 0.515751838684082,
|
| 885 |
-
"learning_rate": 2.4329828146074094e-06,
|
| 886 |
-
"loss": 0.7027,
|
| 887 |
-
"step": 59
|
| 888 |
-
},
|
| 889 |
-
{
|
| 890 |
-
"epoch": 3.1052631578947367,
|
| 891 |
-
"eval_loss": 0.6262253522872925,
|
| 892 |
-
"eval_runtime": 3.3654,
|
| 893 |
-
"eval_samples_per_second": 8.914,
|
| 894 |
-
"eval_steps_per_second": 1.189,
|
| 895 |
-
"step": 59
|
| 896 |
-
},
|
| 897 |
-
{
|
| 898 |
-
"epoch": 3.1578947368421053,
|
| 899 |
-
"grad_norm": 0.5370931029319763,
|
| 900 |
-
"learning_rate": 6.089874350439506e-07,
|
| 901 |
-
"loss": 0.6594,
|
| 902 |
-
"step": 60
|
| 903 |
-
},
|
| 904 |
-
{
|
| 905 |
-
"epoch": 3.1578947368421053,
|
| 906 |
-
"eval_loss": 0.6268424391746521,
|
| 907 |
-
"eval_runtime": 3.3618,
|
| 908 |
-
"eval_samples_per_second": 8.924,
|
| 909 |
-
"eval_steps_per_second": 1.19,
|
| 910 |
-
"step": 60
|
| 911 |
}
|
| 912 |
],
|
| 913 |
"logging_steps": 1,
|
|
@@ -922,12 +622,12 @@
|
|
| 922 |
"should_evaluate": false,
|
| 923 |
"should_log": false,
|
| 924 |
"should_save": true,
|
| 925 |
-
"should_training_stop":
|
| 926 |
},
|
| 927 |
"attributes": {}
|
| 928 |
}
|
| 929 |
},
|
| 930 |
-
"total_flos":
|
| 931 |
"train_batch_size": 1,
|
| 932 |
"trial_name": null,
|
| 933 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 40,
|
| 3 |
+
"best_metric": 0.7394412159919739,
|
| 4 |
+
"best_model_checkpoint": "/content/drive/MyDrive/lora_model/outputs/task15_microsoft/Phi-4-mini-instruct/checkpoint-40",
|
| 5 |
+
"epoch": 2.1052631578947367,
|
| 6 |
"eval_steps": 1,
|
| 7 |
+
"global_step": 40,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 608 |
"eval_samples_per_second": 8.91,
|
| 609 |
"eval_steps_per_second": 1.188,
|
| 610 |
"step": 40
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 611 |
}
|
| 612 |
],
|
| 613 |
"logging_steps": 1,
|
|
|
|
| 622 |
"should_evaluate": false,
|
| 623 |
"should_log": false,
|
| 624 |
"should_save": true,
|
| 625 |
+
"should_training_stop": false
|
| 626 |
},
|
| 627 |
"attributes": {}
|
| 628 |
}
|
| 629 |
},
|
| 630 |
+
"total_flos": 1661495727175680.0,
|
| 631 |
"train_batch_size": 1,
|
| 632 |
"trial_name": null,
|
| 633 |
"trial_params": null
|