Training in progress, step 1500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4523108832
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6696d4f268a5241495ceaff2acea183efcc3afd5b44955ab5f6c2b91adbea6b9
|
| 3 |
size 4523108832
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:363b73880c7a938f5b75d71760d551c6d014704f2a4ec6628c9aaa6f429fa21e
|
| 3 |
+
size 2912179275
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01f9a0f7843a37be87edd23f4e88aa93b38b95cc2c07503eeb1cf2e4632453a2
|
| 3 |
size 14645
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1383
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca372268f4fa9335030c0cb7aedb6cdba75f457da50e7a4034abb1a2d0843689
|
| 3 |
size 1383
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0717d7b217f15f3f50778aac3433a6ba54621949761b93d00f3416efeb959875
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -708,6 +708,356 @@
|
|
| 708 |
"learning_rate": 0.00018002,
|
| 709 |
"loss": 1.862677001953125,
|
| 710 |
"step": 1000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 711 |
}
|
| 712 |
],
|
| 713 |
"logging_steps": 10,
|
|
@@ -727,7 +1077,7 @@
|
|
| 727 |
"attributes": {}
|
| 728 |
}
|
| 729 |
},
|
| 730 |
-
"total_flos":
|
| 731 |
"train_batch_size": 1,
|
| 732 |
"trial_name": null,
|
| 733 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 2.0188,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 1500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 708 |
"learning_rate": 0.00018002,
|
| 709 |
"loss": 1.862677001953125,
|
| 710 |
"step": 1000
|
| 711 |
+
},
|
| 712 |
+
{
|
| 713 |
+
"epoch": 1.001,
|
| 714 |
+
"grad_norm": 0.7271579504013062,
|
| 715 |
+
"learning_rate": 0.00017982000000000002,
|
| 716 |
+
"loss": 1.4198949813842774,
|
| 717 |
+
"step": 1010
|
| 718 |
+
},
|
| 719 |
+
{
|
| 720 |
+
"epoch": 1.002,
|
| 721 |
+
"grad_norm": 0.7241926789283752,
|
| 722 |
+
"learning_rate": 0.00017962000000000002,
|
| 723 |
+
"loss": 1.290165328979492,
|
| 724 |
+
"step": 1020
|
| 725 |
+
},
|
| 726 |
+
{
|
| 727 |
+
"epoch": 1.003,
|
| 728 |
+
"grad_norm": 0.6930139064788818,
|
| 729 |
+
"learning_rate": 0.00017942,
|
| 730 |
+
"loss": 1.2588088989257813,
|
| 731 |
+
"step": 1030
|
| 732 |
+
},
|
| 733 |
+
{
|
| 734 |
+
"epoch": 1.004,
|
| 735 |
+
"grad_norm": 0.7046691179275513,
|
| 736 |
+
"learning_rate": 0.00017922,
|
| 737 |
+
"loss": 1.2508729934692382,
|
| 738 |
+
"step": 1040
|
| 739 |
+
},
|
| 740 |
+
{
|
| 741 |
+
"epoch": 1.005,
|
| 742 |
+
"grad_norm": 0.7425150275230408,
|
| 743 |
+
"learning_rate": 0.00017902,
|
| 744 |
+
"loss": 1.2099505424499513,
|
| 745 |
+
"step": 1050
|
| 746 |
+
},
|
| 747 |
+
{
|
| 748 |
+
"epoch": 1.006,
|
| 749 |
+
"grad_norm": 0.6843275427818298,
|
| 750 |
+
"learning_rate": 0.00017882,
|
| 751 |
+
"loss": 1.1226897239685059,
|
| 752 |
+
"step": 1060
|
| 753 |
+
},
|
| 754 |
+
{
|
| 755 |
+
"epoch": 1.007,
|
| 756 |
+
"grad_norm": 0.6979613900184631,
|
| 757 |
+
"learning_rate": 0.00017862000000000002,
|
| 758 |
+
"loss": 1.6102104187011719,
|
| 759 |
+
"step": 1070
|
| 760 |
+
},
|
| 761 |
+
{
|
| 762 |
+
"epoch": 1.008,
|
| 763 |
+
"grad_norm": 0.6627645492553711,
|
| 764 |
+
"learning_rate": 0.00017842000000000002,
|
| 765 |
+
"loss": 1.9422037124633789,
|
| 766 |
+
"step": 1080
|
| 767 |
+
},
|
| 768 |
+
{
|
| 769 |
+
"epoch": 1.009,
|
| 770 |
+
"grad_norm": 0.6664915084838867,
|
| 771 |
+
"learning_rate": 0.00017822,
|
| 772 |
+
"loss": 1.8065723419189452,
|
| 773 |
+
"step": 1090
|
| 774 |
+
},
|
| 775 |
+
{
|
| 776 |
+
"epoch": 1.01,
|
| 777 |
+
"grad_norm": 0.6870133280754089,
|
| 778 |
+
"learning_rate": 0.00017802,
|
| 779 |
+
"loss": 1.8182893753051759,
|
| 780 |
+
"step": 1100
|
| 781 |
+
},
|
| 782 |
+
{
|
| 783 |
+
"epoch": 1.011,
|
| 784 |
+
"grad_norm": 0.6618905663490295,
|
| 785 |
+
"learning_rate": 0.00017782,
|
| 786 |
+
"loss": 1.814337158203125,
|
| 787 |
+
"step": 1110
|
| 788 |
+
},
|
| 789 |
+
{
|
| 790 |
+
"epoch": 1.012,
|
| 791 |
+
"grad_norm": 1.1367806196212769,
|
| 792 |
+
"learning_rate": 0.00017762,
|
| 793 |
+
"loss": 1.73431453704834,
|
| 794 |
+
"step": 1120
|
| 795 |
+
},
|
| 796 |
+
{
|
| 797 |
+
"epoch": 1.013,
|
| 798 |
+
"grad_norm": 0.685649573802948,
|
| 799 |
+
"learning_rate": 0.00017742000000000002,
|
| 800 |
+
"loss": 1.7108922958374024,
|
| 801 |
+
"step": 1130
|
| 802 |
+
},
|
| 803 |
+
{
|
| 804 |
+
"epoch": 1.014,
|
| 805 |
+
"grad_norm": 0.6594149470329285,
|
| 806 |
+
"learning_rate": 0.00017722000000000001,
|
| 807 |
+
"loss": 1.779058074951172,
|
| 808 |
+
"step": 1140
|
| 809 |
+
},
|
| 810 |
+
{
|
| 811 |
+
"epoch": 1.015,
|
| 812 |
+
"grad_norm": 0.6914166212081909,
|
| 813 |
+
"learning_rate": 0.00017702,
|
| 814 |
+
"loss": 1.7222532272338866,
|
| 815 |
+
"step": 1150
|
| 816 |
+
},
|
| 817 |
+
{
|
| 818 |
+
"epoch": 1.016,
|
| 819 |
+
"grad_norm": 0.6719418168067932,
|
| 820 |
+
"learning_rate": 0.00017682,
|
| 821 |
+
"loss": 1.730402946472168,
|
| 822 |
+
"step": 1160
|
| 823 |
+
},
|
| 824 |
+
{
|
| 825 |
+
"epoch": 1.017,
|
| 826 |
+
"grad_norm": 0.7353265881538391,
|
| 827 |
+
"learning_rate": 0.00017662,
|
| 828 |
+
"loss": 1.711669921875,
|
| 829 |
+
"step": 1170
|
| 830 |
+
},
|
| 831 |
+
{
|
| 832 |
+
"epoch": 1.018,
|
| 833 |
+
"grad_norm": 0.6861590147018433,
|
| 834 |
+
"learning_rate": 0.00017642,
|
| 835 |
+
"loss": 1.7661975860595702,
|
| 836 |
+
"step": 1180
|
| 837 |
+
},
|
| 838 |
+
{
|
| 839 |
+
"epoch": 1.019,
|
| 840 |
+
"grad_norm": 0.6513635516166687,
|
| 841 |
+
"learning_rate": 0.00017622000000000002,
|
| 842 |
+
"loss": 1.6878995895385742,
|
| 843 |
+
"step": 1190
|
| 844 |
+
},
|
| 845 |
+
{
|
| 846 |
+
"epoch": 1.02,
|
| 847 |
+
"grad_norm": 0.6829492449760437,
|
| 848 |
+
"learning_rate": 0.00017602,
|
| 849 |
+
"loss": 1.6560598373413087,
|
| 850 |
+
"step": 1200
|
| 851 |
+
},
|
| 852 |
+
{
|
| 853 |
+
"epoch": 1.021,
|
| 854 |
+
"grad_norm": 0.6806496381759644,
|
| 855 |
+
"learning_rate": 0.00017582,
|
| 856 |
+
"loss": 1.6022390365600585,
|
| 857 |
+
"step": 1210
|
| 858 |
+
},
|
| 859 |
+
{
|
| 860 |
+
"epoch": 1.022,
|
| 861 |
+
"grad_norm": 0.6467958092689514,
|
| 862 |
+
"learning_rate": 0.00017562,
|
| 863 |
+
"loss": 1.527933406829834,
|
| 864 |
+
"step": 1220
|
| 865 |
+
},
|
| 866 |
+
{
|
| 867 |
+
"epoch": 1.023,
|
| 868 |
+
"grad_norm": 0.7558693289756775,
|
| 869 |
+
"learning_rate": 0.00017542,
|
| 870 |
+
"loss": 1.6221937179565429,
|
| 871 |
+
"step": 1230
|
| 872 |
+
},
|
| 873 |
+
{
|
| 874 |
+
"epoch": 1.024,
|
| 875 |
+
"grad_norm": 0.7131749391555786,
|
| 876 |
+
"learning_rate": 0.00017522000000000002,
|
| 877 |
+
"loss": 1.5218177795410157,
|
| 878 |
+
"step": 1240
|
| 879 |
+
},
|
| 880 |
+
{
|
| 881 |
+
"epoch": 1.025,
|
| 882 |
+
"grad_norm": 0.7794132828712463,
|
| 883 |
+
"learning_rate": 0.00017502000000000001,
|
| 884 |
+
"loss": 1.5550528526306153,
|
| 885 |
+
"step": 1250
|
| 886 |
+
},
|
| 887 |
+
{
|
| 888 |
+
"epoch": 1.026,
|
| 889 |
+
"grad_norm": 0.7174275517463684,
|
| 890 |
+
"learning_rate": 0.00017482,
|
| 891 |
+
"loss": 1.5929729461669921,
|
| 892 |
+
"step": 1260
|
| 893 |
+
},
|
| 894 |
+
{
|
| 895 |
+
"epoch": 1.027,
|
| 896 |
+
"grad_norm": 0.7710967659950256,
|
| 897 |
+
"learning_rate": 0.00017462,
|
| 898 |
+
"loss": 1.4883572578430175,
|
| 899 |
+
"step": 1270
|
| 900 |
+
},
|
| 901 |
+
{
|
| 902 |
+
"epoch": 1.028,
|
| 903 |
+
"grad_norm": 0.689930260181427,
|
| 904 |
+
"learning_rate": 0.00017442,
|
| 905 |
+
"loss": 1.4959463119506835,
|
| 906 |
+
"step": 1280
|
| 907 |
+
},
|
| 908 |
+
{
|
| 909 |
+
"epoch": 1.029,
|
| 910 |
+
"grad_norm": 0.7309102416038513,
|
| 911 |
+
"learning_rate": 0.00017422,
|
| 912 |
+
"loss": 1.5185231208801269,
|
| 913 |
+
"step": 1290
|
| 914 |
+
},
|
| 915 |
+
{
|
| 916 |
+
"epoch": 1.03,
|
| 917 |
+
"grad_norm": 0.6255451440811157,
|
| 918 |
+
"learning_rate": 0.00017402000000000002,
|
| 919 |
+
"loss": 1.4664793968200684,
|
| 920 |
+
"step": 1300
|
| 921 |
+
},
|
| 922 |
+
{
|
| 923 |
+
"epoch": 1.031,
|
| 924 |
+
"grad_norm": 0.7244739532470703,
|
| 925 |
+
"learning_rate": 0.00017382,
|
| 926 |
+
"loss": 1.4731544494628905,
|
| 927 |
+
"step": 1310
|
| 928 |
+
},
|
| 929 |
+
{
|
| 930 |
+
"epoch": 2.0008,
|
| 931 |
+
"grad_norm": 0.7122154235839844,
|
| 932 |
+
"learning_rate": 0.00017362,
|
| 933 |
+
"loss": 1.3298330307006836,
|
| 934 |
+
"step": 1320
|
| 935 |
+
},
|
| 936 |
+
{
|
| 937 |
+
"epoch": 2.0018,
|
| 938 |
+
"grad_norm": 0.7280747294425964,
|
| 939 |
+
"learning_rate": 0.00017342,
|
| 940 |
+
"loss": 1.094059658050537,
|
| 941 |
+
"step": 1330
|
| 942 |
+
},
|
| 943 |
+
{
|
| 944 |
+
"epoch": 2.0028,
|
| 945 |
+
"grad_norm": 0.7428712844848633,
|
| 946 |
+
"learning_rate": 0.00017322,
|
| 947 |
+
"loss": 0.9706879615783691,
|
| 948 |
+
"step": 1340
|
| 949 |
+
},
|
| 950 |
+
{
|
| 951 |
+
"epoch": 2.0038,
|
| 952 |
+
"grad_norm": 0.759975790977478,
|
| 953 |
+
"learning_rate": 0.00017302,
|
| 954 |
+
"loss": 1.02620849609375,
|
| 955 |
+
"step": 1350
|
| 956 |
+
},
|
| 957 |
+
{
|
| 958 |
+
"epoch": 2.0048,
|
| 959 |
+
"grad_norm": 0.7684850692749023,
|
| 960 |
+
"learning_rate": 0.00017282000000000002,
|
| 961 |
+
"loss": 0.9361392974853515,
|
| 962 |
+
"step": 1360
|
| 963 |
+
},
|
| 964 |
+
{
|
| 965 |
+
"epoch": 2.0058,
|
| 966 |
+
"grad_norm": 0.7847408652305603,
|
| 967 |
+
"learning_rate": 0.00017262,
|
| 968 |
+
"loss": 0.8885183334350586,
|
| 969 |
+
"step": 1370
|
| 970 |
+
},
|
| 971 |
+
{
|
| 972 |
+
"epoch": 2.0068,
|
| 973 |
+
"grad_norm": 0.921360194683075,
|
| 974 |
+
"learning_rate": 0.00017242,
|
| 975 |
+
"loss": 1.0311158180236817,
|
| 976 |
+
"step": 1380
|
| 977 |
+
},
|
| 978 |
+
{
|
| 979 |
+
"epoch": 2.0078,
|
| 980 |
+
"grad_norm": 0.6837793588638306,
|
| 981 |
+
"learning_rate": 0.00017222,
|
| 982 |
+
"loss": 1.2890826225280763,
|
| 983 |
+
"step": 1390
|
| 984 |
+
},
|
| 985 |
+
{
|
| 986 |
+
"epoch": 2.0088,
|
| 987 |
+
"grad_norm": 0.7072200775146484,
|
| 988 |
+
"learning_rate": 0.00017202,
|
| 989 |
+
"loss": 1.209097957611084,
|
| 990 |
+
"step": 1400
|
| 991 |
+
},
|
| 992 |
+
{
|
| 993 |
+
"epoch": 2.0098,
|
| 994 |
+
"grad_norm": 0.6607412695884705,
|
| 995 |
+
"learning_rate": 0.00017182,
|
| 996 |
+
"loss": 1.2470938682556152,
|
| 997 |
+
"step": 1410
|
| 998 |
+
},
|
| 999 |
+
{
|
| 1000 |
+
"epoch": 2.0108,
|
| 1001 |
+
"grad_norm": 0.7476115226745605,
|
| 1002 |
+
"learning_rate": 0.00017162000000000001,
|
| 1003 |
+
"loss": 1.2279239654541017,
|
| 1004 |
+
"step": 1420
|
| 1005 |
+
},
|
| 1006 |
+
{
|
| 1007 |
+
"epoch": 2.0118,
|
| 1008 |
+
"grad_norm": 0.7690820693969727,
|
| 1009 |
+
"learning_rate": 0.00017142,
|
| 1010 |
+
"loss": 1.1757261276245117,
|
| 1011 |
+
"step": 1430
|
| 1012 |
+
},
|
| 1013 |
+
{
|
| 1014 |
+
"epoch": 2.0128,
|
| 1015 |
+
"grad_norm": 0.7281740307807922,
|
| 1016 |
+
"learning_rate": 0.00017122,
|
| 1017 |
+
"loss": 1.158098030090332,
|
| 1018 |
+
"step": 1440
|
| 1019 |
+
},
|
| 1020 |
+
{
|
| 1021 |
+
"epoch": 2.0138,
|
| 1022 |
+
"grad_norm": 0.7036393880844116,
|
| 1023 |
+
"learning_rate": 0.00017102,
|
| 1024 |
+
"loss": 1.1959182739257812,
|
| 1025 |
+
"step": 1450
|
| 1026 |
+
},
|
| 1027 |
+
{
|
| 1028 |
+
"epoch": 2.0148,
|
| 1029 |
+
"grad_norm": 0.7431781888008118,
|
| 1030 |
+
"learning_rate": 0.00017082,
|
| 1031 |
+
"loss": 1.189777946472168,
|
| 1032 |
+
"step": 1460
|
| 1033 |
+
},
|
| 1034 |
+
{
|
| 1035 |
+
"epoch": 2.0158,
|
| 1036 |
+
"grad_norm": 0.7411831021308899,
|
| 1037 |
+
"learning_rate": 0.00017062,
|
| 1038 |
+
"loss": 1.2544533729553222,
|
| 1039 |
+
"step": 1470
|
| 1040 |
+
},
|
| 1041 |
+
{
|
| 1042 |
+
"epoch": 2.0168,
|
| 1043 |
+
"grad_norm": 0.7163280844688416,
|
| 1044 |
+
"learning_rate": 0.00017042,
|
| 1045 |
+
"loss": 1.165062141418457,
|
| 1046 |
+
"step": 1480
|
| 1047 |
+
},
|
| 1048 |
+
{
|
| 1049 |
+
"epoch": 2.0178,
|
| 1050 |
+
"grad_norm": 0.7118193507194519,
|
| 1051 |
+
"learning_rate": 0.00017022,
|
| 1052 |
+
"loss": 1.2281935691833497,
|
| 1053 |
+
"step": 1490
|
| 1054 |
+
},
|
| 1055 |
+
{
|
| 1056 |
+
"epoch": 2.0188,
|
| 1057 |
+
"grad_norm": 0.6703944206237793,
|
| 1058 |
+
"learning_rate": 0.00017002,
|
| 1059 |
+
"loss": 1.1760727882385253,
|
| 1060 |
+
"step": 1500
|
| 1061 |
}
|
| 1062 |
],
|
| 1063 |
"logging_steps": 10,
|
|
|
|
| 1077 |
"attributes": {}
|
| 1078 |
}
|
| 1079 |
},
|
| 1080 |
+
"total_flos": 1.2700397305014682e+17,
|
| 1081 |
"train_batch_size": 1,
|
| 1082 |
"trial_name": null,
|
| 1083 |
"trial_params": null
|
last-checkpoint/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5329
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a906813357c502f71d3ca10d7a5748ab64a30407afe18ed781d8ce5e0a1e7e1
|
| 3 |
size 5329
|