Upload folder using huggingface_hub
Browse files- adapter_model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +4 -344
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 369134112
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b98ba6ac806c03c0409f8d783327298917bd9290b863f004e8c9f4949a49cab
|
| 3 |
size 369134112
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 738417355
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7750a024ffc36f0b2b3d75b6d23a4abc45828022cd9fc314ed0ca873e7afc478
|
| 3 |
size 738417355
|
rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:de69a2834426ff9ef8199d077e00892579278af31d8969d77f98235b5cfc010a
|
| 3 |
size 14645
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c2b8b314158649523e5cd4cc114f7b492743419645cb17f66610bf7539ffeb77
|
| 3 |
size 1465
|
trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -658,346 +658,6 @@
|
|
| 658 |
"mean_token_accuracy": 0.7686784416437149,
|
| 659 |
"num_tokens": 116981.0,
|
| 660 |
"step": 65
|
| 661 |
-
},
|
| 662 |
-
{
|
| 663 |
-
"entropy": 1.2168401956558228,
|
| 664 |
-
"epoch": 2.0,
|
| 665 |
-
"grad_norm": 18.375,
|
| 666 |
-
"learning_rate": 1.0578672383836437e-05,
|
| 667 |
-
"loss": 1.1399,
|
| 668 |
-
"mean_token_accuracy": 0.6772964239120484,
|
| 669 |
-
"num_tokens": 118284.0,
|
| 670 |
-
"step": 66
|
| 671 |
-
},
|
| 672 |
-
{
|
| 673 |
-
"entropy": 1.0973209738731384,
|
| 674 |
-
"epoch": 2.0306513409961684,
|
| 675 |
-
"grad_norm": 7.8125,
|
| 676 |
-
"learning_rate": 1.0045814070672498e-05,
|
| 677 |
-
"loss": 0.3245,
|
| 678 |
-
"mean_token_accuracy": 0.9032263904809952,
|
| 679 |
-
"num_tokens": 119663.0,
|
| 680 |
-
"step": 67
|
| 681 |
-
},
|
| 682 |
-
{
|
| 683 |
-
"entropy": 1.053741380572319,
|
| 684 |
-
"epoch": 2.0613026819923372,
|
| 685 |
-
"grad_norm": 6.0,
|
| 686 |
-
"learning_rate": 9.519884634504074e-06,
|
| 687 |
-
"loss": 0.2808,
|
| 688 |
-
"mean_token_accuracy": 0.9356953203678131,
|
| 689 |
-
"num_tokens": 121476.0,
|
| 690 |
-
"step": 68
|
| 691 |
-
},
|
| 692 |
-
{
|
| 693 |
-
"entropy": 0.9946238845586777,
|
| 694 |
-
"epoch": 2.0919540229885056,
|
| 695 |
-
"grad_norm": 5.375,
|
| 696 |
-
"learning_rate": 9.001619635203889e-06,
|
| 697 |
-
"loss": 0.2809,
|
| 698 |
-
"mean_token_accuracy": 0.9175683632493019,
|
| 699 |
-
"num_tokens": 123792.0,
|
| 700 |
-
"step": 69
|
| 701 |
-
},
|
| 702 |
-
{
|
| 703 |
-
"entropy": 1.015475258231163,
|
| 704 |
-
"epoch": 2.1226053639846745,
|
| 705 |
-
"grad_norm": 6.65625,
|
| 706 |
-
"learning_rate": 8.491743913236629e-06,
|
| 707 |
-
"loss": 0.2802,
|
| 708 |
-
"mean_token_accuracy": 0.9311554208397865,
|
| 709 |
-
"num_tokens": 125329.0,
|
| 710 |
-
"step": 70
|
| 711 |
-
},
|
| 712 |
-
{
|
| 713 |
-
"entropy": 0.9921716600656509,
|
| 714 |
-
"epoch": 2.153256704980843,
|
| 715 |
-
"grad_norm": 6.78125,
|
| 716 |
-
"learning_rate": 7.99097057590407e-06,
|
| 717 |
-
"loss": 0.2807,
|
| 718 |
-
"mean_token_accuracy": 0.9192091822624207,
|
| 719 |
-
"num_tokens": 126654.0,
|
| 720 |
-
"step": 71
|
| 721 |
-
},
|
| 722 |
-
{
|
| 723 |
-
"entropy": 0.8778632581233978,
|
| 724 |
-
"epoch": 2.1839080459770113,
|
| 725 |
-
"grad_norm": 6.09375,
|
| 726 |
-
"learning_rate": 7.500000000000004e-06,
|
| 727 |
-
"loss": 0.2776,
|
| 728 |
-
"mean_token_accuracy": 0.9309542253613472,
|
| 729 |
-
"num_tokens": 128629.0,
|
| 730 |
-
"step": 72
|
| 731 |
-
},
|
| 732 |
-
{
|
| 733 |
-
"entropy": 0.953188918530941,
|
| 734 |
-
"epoch": 2.21455938697318,
|
| 735 |
-
"grad_norm": 8.6875,
|
| 736 |
-
"learning_rate": 7.019518852269953e-06,
|
| 737 |
-
"loss": 0.4596,
|
| 738 |
-
"mean_token_accuracy": 0.8634384647011757,
|
| 739 |
-
"num_tokens": 130344.0,
|
| 740 |
-
"step": 73
|
| 741 |
-
},
|
| 742 |
-
{
|
| 743 |
-
"entropy": 0.8518025800585747,
|
| 744 |
-
"epoch": 2.2452107279693485,
|
| 745 |
-
"grad_norm": 7.46875,
|
| 746 |
-
"learning_rate": 6.55019912904567e-06,
|
| 747 |
-
"loss": 0.3006,
|
| 748 |
-
"mean_token_accuracy": 0.9241785854101181,
|
| 749 |
-
"num_tokens": 132152.0,
|
| 750 |
-
"step": 74
|
| 751 |
-
},
|
| 752 |
-
{
|
| 753 |
-
"entropy": 0.8467591479420662,
|
| 754 |
-
"epoch": 2.2758620689655173,
|
| 755 |
-
"grad_norm": 6.40625,
|
| 756 |
-
"learning_rate": 6.092697216397478e-06,
|
| 757 |
-
"loss": 0.2682,
|
| 758 |
-
"mean_token_accuracy": 0.9179906323552132,
|
| 759 |
-
"num_tokens": 134144.0,
|
| 760 |
-
"step": 75
|
| 761 |
-
},
|
| 762 |
-
{
|
| 763 |
-
"entropy": 0.7837551906704903,
|
| 764 |
-
"epoch": 2.3065134099616857,
|
| 765 |
-
"grad_norm": 7.25,
|
| 766 |
-
"learning_rate": 5.647652972118998e-06,
|
| 767 |
-
"loss": 0.3422,
|
| 768 |
-
"mean_token_accuracy": 0.8964523077011108,
|
| 769 |
-
"num_tokens": 136715.0,
|
| 770 |
-
"step": 76
|
| 771 |
-
},
|
| 772 |
-
{
|
| 773 |
-
"entropy": 0.7817510291934013,
|
| 774 |
-
"epoch": 2.3371647509578546,
|
| 775 |
-
"grad_norm": 7.25,
|
| 776 |
-
"learning_rate": 5.2156888308281875e-06,
|
| 777 |
-
"loss": 0.2678,
|
| 778 |
-
"mean_token_accuracy": 0.9292137995362282,
|
| 779 |
-
"num_tokens": 138907.0,
|
| 780 |
-
"step": 77
|
| 781 |
-
},
|
| 782 |
-
{
|
| 783 |
-
"entropy": 0.7645558379590511,
|
| 784 |
-
"epoch": 2.367816091954023,
|
| 785 |
-
"grad_norm": 7.6875,
|
| 786 |
-
"learning_rate": 4.797408933436207e-06,
|
| 787 |
-
"loss": 0.2069,
|
| 788 |
-
"mean_token_accuracy": 0.9325998574495316,
|
| 789 |
-
"num_tokens": 140536.0,
|
| 790 |
-
"step": 78
|
| 791 |
-
},
|
| 792 |
-
{
|
| 793 |
-
"entropy": 0.756471686065197,
|
| 794 |
-
"epoch": 2.3984674329501914,
|
| 795 |
-
"grad_norm": 8.6875,
|
| 796 |
-
"learning_rate": 4.393398282201788e-06,
|
| 797 |
-
"loss": 0.2288,
|
| 798 |
-
"mean_token_accuracy": 0.924439363181591,
|
| 799 |
-
"num_tokens": 142205.0,
|
| 800 |
-
"step": 79
|
| 801 |
-
},
|
| 802 |
-
{
|
| 803 |
-
"entropy": 0.7203860953450203,
|
| 804 |
-
"epoch": 2.42911877394636,
|
| 805 |
-
"grad_norm": 8.75,
|
| 806 |
-
"learning_rate": 4.004221922552608e-06,
|
| 807 |
-
"loss": 0.3023,
|
| 808 |
-
"mean_token_accuracy": 0.9196523949503899,
|
| 809 |
-
"num_tokens": 143937.0,
|
| 810 |
-
"step": 80
|
| 811 |
-
},
|
| 812 |
-
{
|
| 813 |
-
"entropy": 0.7062718719244003,
|
| 814 |
-
"epoch": 2.4597701149425286,
|
| 815 |
-
"grad_norm": 8.3125,
|
| 816 |
-
"learning_rate": 3.630424152818203e-06,
|
| 817 |
-
"loss": 0.242,
|
| 818 |
-
"mean_token_accuracy": 0.9289174377918243,
|
| 819 |
-
"num_tokens": 145867.0,
|
| 820 |
-
"step": 81
|
| 821 |
-
},
|
| 822 |
-
{
|
| 823 |
-
"entropy": 0.7174801900982857,
|
| 824 |
-
"epoch": 2.4904214559386975,
|
| 825 |
-
"grad_norm": 10.0625,
|
| 826 |
-
"learning_rate": 3.272527762979553e-06,
|
| 827 |
-
"loss": 0.3277,
|
| 828 |
-
"mean_token_accuracy": 0.9081463739275932,
|
| 829 |
-
"num_tokens": 147522.0,
|
| 830 |
-
"step": 82
|
| 831 |
-
},
|
| 832 |
-
{
|
| 833 |
-
"entropy": 0.7576407790184021,
|
| 834 |
-
"epoch": 2.521072796934866,
|
| 835 |
-
"grad_norm": 10.5,
|
| 836 |
-
"learning_rate": 2.931033303499975e-06,
|
| 837 |
-
"loss": 0.2869,
|
| 838 |
-
"mean_token_accuracy": 0.9234072640538216,
|
| 839 |
-
"num_tokens": 149154.0,
|
| 840 |
-
"step": 83
|
| 841 |
-
},
|
| 842 |
-
{
|
| 843 |
-
"entropy": 0.6603295132517815,
|
| 844 |
-
"epoch": 2.5517241379310347,
|
| 845 |
-
"grad_norm": 8.5,
|
| 846 |
-
"learning_rate": 2.60641838526008e-06,
|
| 847 |
-
"loss": 0.2954,
|
| 848 |
-
"mean_token_accuracy": 0.9192768260836601,
|
| 849 |
-
"num_tokens": 151443.0,
|
| 850 |
-
"step": 84
|
| 851 |
-
},
|
| 852 |
-
{
|
| 853 |
-
"entropy": 0.7209493666887283,
|
| 854 |
-
"epoch": 2.582375478927203,
|
| 855 |
-
"grad_norm": 7.625,
|
| 856 |
-
"learning_rate": 2.2991370115757383e-06,
|
| 857 |
-
"loss": 0.2553,
|
| 858 |
-
"mean_token_accuracy": 0.9288515150547028,
|
| 859 |
-
"num_tokens": 153346.0,
|
| 860 |
-
"step": 85
|
| 861 |
-
},
|
| 862 |
-
{
|
| 863 |
-
"entropy": 0.7502265051007271,
|
| 864 |
-
"epoch": 2.6130268199233715,
|
| 865 |
-
"grad_norm": 10.0625,
|
| 866 |
-
"learning_rate": 2.0096189432334194e-06,
|
| 867 |
-
"loss": 0.2759,
|
| 868 |
-
"mean_token_accuracy": 0.9101333618164062,
|
| 869 |
-
"num_tokens": 155041.0,
|
| 870 |
-
"step": 86
|
| 871 |
-
},
|
| 872 |
-
{
|
| 873 |
-
"entropy": 0.6479271687567234,
|
| 874 |
-
"epoch": 2.6436781609195403,
|
| 875 |
-
"grad_norm": 7.65625,
|
| 876 |
-
"learning_rate": 1.7382690974308551e-06,
|
| 877 |
-
"loss": 0.1765,
|
| 878 |
-
"mean_token_accuracy": 0.9528548792004585,
|
| 879 |
-
"num_tokens": 156508.0,
|
| 880 |
-
"step": 87
|
| 881 |
-
},
|
| 882 |
-
{
|
| 883 |
-
"entropy": 0.686508409678936,
|
| 884 |
-
"epoch": 2.6743295019157087,
|
| 885 |
-
"grad_norm": 6.5625,
|
| 886 |
-
"learning_rate": 1.4854669814637145e-06,
|
| 887 |
-
"loss": 0.1907,
|
| 888 |
-
"mean_token_accuracy": 0.9471124485135078,
|
| 889 |
-
"num_tokens": 158506.0,
|
| 890 |
-
"step": 88
|
| 891 |
-
},
|
| 892 |
-
{
|
| 893 |
-
"entropy": 0.6940162889659405,
|
| 894 |
-
"epoch": 2.704980842911877,
|
| 895 |
-
"grad_norm": 7.0625,
|
| 896 |
-
"learning_rate": 1.2515661619503572e-06,
|
| 897 |
-
"loss": 0.2139,
|
| 898 |
-
"mean_token_accuracy": 0.9348281025886536,
|
| 899 |
-
"num_tokens": 160511.0,
|
| 900 |
-
"step": 89
|
| 901 |
-
},
|
| 902 |
-
{
|
| 903 |
-
"entropy": 0.7100252062082291,
|
| 904 |
-
"epoch": 2.735632183908046,
|
| 905 |
-
"grad_norm": 9.0625,
|
| 906 |
-
"learning_rate": 1.036893770336938e-06,
|
| 907 |
-
"loss": 0.2846,
|
| 908 |
-
"mean_token_accuracy": 0.9120082557201385,
|
| 909 |
-
"num_tokens": 162548.0,
|
| 910 |
-
"step": 90
|
| 911 |
-
},
|
| 912 |
-
{
|
| 913 |
-
"entropy": 0.689895510673523,
|
| 914 |
-
"epoch": 2.766283524904215,
|
| 915 |
-
"grad_norm": 7.59375,
|
| 916 |
-
"learning_rate": 8.417500453744864e-07,
|
| 917 |
-
"loss": 0.2794,
|
| 918 |
-
"mean_token_accuracy": 0.9187788665294647,
|
| 919 |
-
"num_tokens": 164874.0,
|
| 920 |
-
"step": 91
|
| 921 |
-
},
|
| 922 |
-
{
|
| 923 |
-
"entropy": 0.6664801873266697,
|
| 924 |
-
"epoch": 2.796934865900383,
|
| 925 |
-
"grad_norm": 7.96875,
|
| 926 |
-
"learning_rate": 6.664079132078881e-07,
|
| 927 |
-
"loss": 0.199,
|
| 928 |
-
"mean_token_accuracy": 0.94305020570755,
|
| 929 |
-
"num_tokens": 166614.0,
|
| 930 |
-
"step": 92
|
| 931 |
-
},
|
| 932 |
-
{
|
| 933 |
-
"entropy": 0.7356143966317177,
|
| 934 |
-
"epoch": 2.8275862068965516,
|
| 935 |
-
"grad_norm": 29.25,
|
| 936 |
-
"learning_rate": 5.11112605663977e-07,
|
| 937 |
-
"loss": 0.3566,
|
| 938 |
-
"mean_token_accuracy": 0.8869450762867928,
|
| 939 |
-
"num_tokens": 168220.0,
|
| 940 |
-
"step": 93
|
| 941 |
-
},
|
| 942 |
-
{
|
| 943 |
-
"entropy": 0.7260653525590897,
|
| 944 |
-
"epoch": 2.8582375478927204,
|
| 945 |
-
"grad_norm": 12.0625,
|
| 946 |
-
"learning_rate": 3.760813172726457e-07,
|
| 947 |
-
"loss": 0.2395,
|
| 948 |
-
"mean_token_accuracy": 0.9347701147198677,
|
| 949 |
-
"num_tokens": 169540.0,
|
| 950 |
-
"step": 94
|
| 951 |
-
},
|
| 952 |
-
{
|
| 953 |
-
"entropy": 0.6620675958693027,
|
| 954 |
-
"epoch": 2.888888888888889,
|
| 955 |
-
"grad_norm": 7.3125,
|
| 956 |
-
"learning_rate": 2.6150290150067593e-07,
|
| 957 |
-
"loss": 0.2358,
|
| 958 |
-
"mean_token_accuracy": 0.9333521574735641,
|
| 959 |
-
"num_tokens": 171709.0,
|
| 960 |
-
"step": 95
|
| 961 |
-
},
|
| 962 |
-
{
|
| 963 |
-
"entropy": 0.6657432429492474,
|
| 964 |
-
"epoch": 2.9195402298850572,
|
| 965 |
-
"grad_norm": 9.375,
|
| 966 |
-
"learning_rate": 1.6753760662307217e-07,
|
| 967 |
-
"loss": 0.2499,
|
| 968 |
-
"mean_token_accuracy": 0.9248412474989891,
|
| 969 |
-
"num_tokens": 173432.0,
|
| 970 |
-
"step": 96
|
| 971 |
-
},
|
| 972 |
-
{
|
| 973 |
-
"entropy": 0.6610175892710686,
|
| 974 |
-
"epoch": 2.950191570881226,
|
| 975 |
-
"grad_norm": 10.3125,
|
| 976 |
-
"learning_rate": 9.431685160136094e-08,
|
| 977 |
-
"loss": 0.2274,
|
| 978 |
-
"mean_token_accuracy": 0.9352346211671829,
|
| 979 |
-
"num_tokens": 174962.0,
|
| 980 |
-
"step": 97
|
| 981 |
-
},
|
| 982 |
-
{
|
| 983 |
-
"entropy": 0.6855079308152199,
|
| 984 |
-
"epoch": 2.9808429118773945,
|
| 985 |
-
"grad_norm": 9.4375,
|
| 986 |
-
"learning_rate": 4.194304228229806e-08,
|
| 987 |
-
"loss": 0.2806,
|
| 988 |
-
"mean_token_accuracy": 0.9201195910573006,
|
| 989 |
-
"num_tokens": 176611.0,
|
| 990 |
-
"step": 98
|
| 991 |
-
},
|
| 992 |
-
{
|
| 993 |
-
"entropy": 0.6942157626152039,
|
| 994 |
-
"epoch": 3.0,
|
| 995 |
-
"grad_norm": 11.0625,
|
| 996 |
-
"learning_rate": 1.0489428174020877e-08,
|
| 997 |
-
"loss": 0.1556,
|
| 998 |
-
"mean_token_accuracy": 0.9565272331237793,
|
| 999 |
-
"num_tokens": 177426.0,
|
| 1000 |
-
"step": 99
|
| 1001 |
}
|
| 1002 |
],
|
| 1003 |
"logging_steps": 1,
|
|
@@ -1012,12 +672,12 @@
|
|
| 1012 |
"should_evaluate": false,
|
| 1013 |
"should_log": false,
|
| 1014 |
"should_save": true,
|
| 1015 |
-
"should_training_stop":
|
| 1016 |
},
|
| 1017 |
"attributes": {}
|
| 1018 |
}
|
| 1019 |
},
|
| 1020 |
-
"total_flos":
|
| 1021 |
"train_batch_size": 2,
|
| 1022 |
"trial_name": null,
|
| 1023 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.9808429118773945,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 65,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 658 |
"mean_token_accuracy": 0.7686784416437149,
|
| 659 |
"num_tokens": 116981.0,
|
| 660 |
"step": 65
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 661 |
}
|
| 662 |
],
|
| 663 |
"logging_steps": 1,
|
|
|
|
| 672 |
"should_evaluate": false,
|
| 673 |
"should_log": false,
|
| 674 |
"should_save": true,
|
| 675 |
+
"should_training_stop": false
|
| 676 |
},
|
| 677 |
"attributes": {}
|
| 678 |
}
|
| 679 |
},
|
| 680 |
+
"total_flos": 3202052021059584.0,
|
| 681 |
"train_batch_size": 2,
|
| 682 |
"trial_name": null,
|
| 683 |
"trial_params": null
|