Training in progress, step 1500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 737580392
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:314d749e4dd7abfe5f61b2561f4a91a44ca75ecd1eed4e91397af84284dbe186
|
| 3 |
size 737580392
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1475248442
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef4dc31615294601109de6bdb9c8366f58c1c42b24bbdb22c50617985782c033
|
| 3 |
size 1475248442
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f991cde05bd4bf7e497599ae4e2cc6a082c7ed663e36ba15a2e932ed573a6a1f
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 2.0,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -707,6 +707,356 @@
|
|
| 707 |
"learning_rate": 2.9414245548266168e-05,
|
| 708 |
"loss": 0.3148,
|
| 709 |
"step": 1000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 710 |
}
|
| 711 |
],
|
| 712 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.7029053420805998,
|
| 5 |
"eval_steps": 2.0,
|
| 6 |
+
"global_step": 1500,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 707 |
"learning_rate": 2.9414245548266168e-05,
|
| 708 |
"loss": 0.3148,
|
| 709 |
"step": 1000
|
| 710 |
+
},
|
| 711 |
+
{
|
| 712 |
+
"epoch": 0.4732895970009372,
|
| 713 |
+
"grad_norm": 2.0385727882385254,
|
| 714 |
+
"learning_rate": 2.940838800374883e-05,
|
| 715 |
+
"loss": 0.3239,
|
| 716 |
+
"step": 1010
|
| 717 |
+
},
|
| 718 |
+
{
|
| 719 |
+
"epoch": 0.47797563261480785,
|
| 720 |
+
"grad_norm": 1.620071291923523,
|
| 721 |
+
"learning_rate": 2.940253045923149e-05,
|
| 722 |
+
"loss": 0.3402,
|
| 723 |
+
"step": 1020
|
| 724 |
+
},
|
| 725 |
+
{
|
| 726 |
+
"epoch": 0.48266166822867856,
|
| 727 |
+
"grad_norm": 1.8785263299942017,
|
| 728 |
+
"learning_rate": 2.9396672914714152e-05,
|
| 729 |
+
"loss": 0.3246,
|
| 730 |
+
"step": 1030
|
| 731 |
+
},
|
| 732 |
+
{
|
| 733 |
+
"epoch": 0.4873477038425492,
|
| 734 |
+
"grad_norm": 2.2061355113983154,
|
| 735 |
+
"learning_rate": 2.9390815370196814e-05,
|
| 736 |
+
"loss": 0.3742,
|
| 737 |
+
"step": 1040
|
| 738 |
+
},
|
| 739 |
+
{
|
| 740 |
+
"epoch": 0.49203373945641987,
|
| 741 |
+
"grad_norm": 1.86517333984375,
|
| 742 |
+
"learning_rate": 2.9384957825679477e-05,
|
| 743 |
+
"loss": 0.3132,
|
| 744 |
+
"step": 1050
|
| 745 |
+
},
|
| 746 |
+
{
|
| 747 |
+
"epoch": 0.4967197750702905,
|
| 748 |
+
"grad_norm": 1.9350093603134155,
|
| 749 |
+
"learning_rate": 2.9379100281162136e-05,
|
| 750 |
+
"loss": 0.3288,
|
| 751 |
+
"step": 1060
|
| 752 |
+
},
|
| 753 |
+
{
|
| 754 |
+
"epoch": 0.5014058106841612,
|
| 755 |
+
"grad_norm": 1.9156781435012817,
|
| 756 |
+
"learning_rate": 2.93732427366448e-05,
|
| 757 |
+
"loss": 0.3561,
|
| 758 |
+
"step": 1070
|
| 759 |
+
},
|
| 760 |
+
{
|
| 761 |
+
"epoch": 0.5060918462980318,
|
| 762 |
+
"grad_norm": 1.9902970790863037,
|
| 763 |
+
"learning_rate": 2.936738519212746e-05,
|
| 764 |
+
"loss": 0.3531,
|
| 765 |
+
"step": 1080
|
| 766 |
+
},
|
| 767 |
+
{
|
| 768 |
+
"epoch": 0.5107778819119025,
|
| 769 |
+
"grad_norm": 2.0289220809936523,
|
| 770 |
+
"learning_rate": 2.9361527647610123e-05,
|
| 771 |
+
"loss": 0.3589,
|
| 772 |
+
"step": 1090
|
| 773 |
+
},
|
| 774 |
+
{
|
| 775 |
+
"epoch": 0.5154639175257731,
|
| 776 |
+
"grad_norm": 2.2270498275756836,
|
| 777 |
+
"learning_rate": 2.9355670103092782e-05,
|
| 778 |
+
"loss": 0.3221,
|
| 779 |
+
"step": 1100
|
| 780 |
+
},
|
| 781 |
+
{
|
| 782 |
+
"epoch": 0.5201499531396439,
|
| 783 |
+
"grad_norm": 2.1600871086120605,
|
| 784 |
+
"learning_rate": 2.9349812558575448e-05,
|
| 785 |
+
"loss": 0.3466,
|
| 786 |
+
"step": 1110
|
| 787 |
+
},
|
| 788 |
+
{
|
| 789 |
+
"epoch": 0.5248359887535146,
|
| 790 |
+
"grad_norm": 1.8811109066009521,
|
| 791 |
+
"learning_rate": 2.9343955014058107e-05,
|
| 792 |
+
"loss": 0.3416,
|
| 793 |
+
"step": 1120
|
| 794 |
+
},
|
| 795 |
+
{
|
| 796 |
+
"epoch": 0.5295220243673852,
|
| 797 |
+
"grad_norm": 1.6011837720870972,
|
| 798 |
+
"learning_rate": 2.933809746954077e-05,
|
| 799 |
+
"loss": 0.3707,
|
| 800 |
+
"step": 1130
|
| 801 |
+
},
|
| 802 |
+
{
|
| 803 |
+
"epoch": 0.5342080599812559,
|
| 804 |
+
"grad_norm": 1.9934078454971313,
|
| 805 |
+
"learning_rate": 2.9332239925023428e-05,
|
| 806 |
+
"loss": 0.3954,
|
| 807 |
+
"step": 1140
|
| 808 |
+
},
|
| 809 |
+
{
|
| 810 |
+
"epoch": 0.5388940955951266,
|
| 811 |
+
"grad_norm": 1.530349612236023,
|
| 812 |
+
"learning_rate": 2.9326382380506094e-05,
|
| 813 |
+
"loss": 0.3301,
|
| 814 |
+
"step": 1150
|
| 815 |
+
},
|
| 816 |
+
{
|
| 817 |
+
"epoch": 0.5435801312089972,
|
| 818 |
+
"grad_norm": 1.9134522676467896,
|
| 819 |
+
"learning_rate": 2.9320524835988753e-05,
|
| 820 |
+
"loss": 0.3133,
|
| 821 |
+
"step": 1160
|
| 822 |
+
},
|
| 823 |
+
{
|
| 824 |
+
"epoch": 0.5482661668228679,
|
| 825 |
+
"grad_norm": 2.5994045734405518,
|
| 826 |
+
"learning_rate": 2.9314667291471415e-05,
|
| 827 |
+
"loss": 0.3366,
|
| 828 |
+
"step": 1170
|
| 829 |
+
},
|
| 830 |
+
{
|
| 831 |
+
"epoch": 0.5529522024367385,
|
| 832 |
+
"grad_norm": 1.6373965740203857,
|
| 833 |
+
"learning_rate": 2.9308809746954078e-05,
|
| 834 |
+
"loss": 0.3447,
|
| 835 |
+
"step": 1180
|
| 836 |
+
},
|
| 837 |
+
{
|
| 838 |
+
"epoch": 0.5576382380506092,
|
| 839 |
+
"grad_norm": 1.8884459733963013,
|
| 840 |
+
"learning_rate": 2.930295220243674e-05,
|
| 841 |
+
"loss": 0.3201,
|
| 842 |
+
"step": 1190
|
| 843 |
+
},
|
| 844 |
+
{
|
| 845 |
+
"epoch": 0.5623242736644799,
|
| 846 |
+
"grad_norm": 2.0964128971099854,
|
| 847 |
+
"learning_rate": 2.9297094657919402e-05,
|
| 848 |
+
"loss": 0.3528,
|
| 849 |
+
"step": 1200
|
| 850 |
+
},
|
| 851 |
+
{
|
| 852 |
+
"epoch": 0.5670103092783505,
|
| 853 |
+
"grad_norm": 1.8909735679626465,
|
| 854 |
+
"learning_rate": 2.929123711340206e-05,
|
| 855 |
+
"loss": 0.3208,
|
| 856 |
+
"step": 1210
|
| 857 |
+
},
|
| 858 |
+
{
|
| 859 |
+
"epoch": 0.5716963448922212,
|
| 860 |
+
"grad_norm": 1.6073822975158691,
|
| 861 |
+
"learning_rate": 2.9285379568884727e-05,
|
| 862 |
+
"loss": 0.3231,
|
| 863 |
+
"step": 1220
|
| 864 |
+
},
|
| 865 |
+
{
|
| 866 |
+
"epoch": 0.5763823805060918,
|
| 867 |
+
"grad_norm": 1.852653980255127,
|
| 868 |
+
"learning_rate": 2.9279522024367386e-05,
|
| 869 |
+
"loss": 0.331,
|
| 870 |
+
"step": 1230
|
| 871 |
+
},
|
| 872 |
+
{
|
| 873 |
+
"epoch": 0.5810684161199625,
|
| 874 |
+
"grad_norm": 1.4911562204360962,
|
| 875 |
+
"learning_rate": 2.927366447985005e-05,
|
| 876 |
+
"loss": 0.2986,
|
| 877 |
+
"step": 1240
|
| 878 |
+
},
|
| 879 |
+
{
|
| 880 |
+
"epoch": 0.5857544517338332,
|
| 881 |
+
"grad_norm": 1.87758207321167,
|
| 882 |
+
"learning_rate": 2.9267806935332708e-05,
|
| 883 |
+
"loss": 0.3348,
|
| 884 |
+
"step": 1250
|
| 885 |
+
},
|
| 886 |
+
{
|
| 887 |
+
"epoch": 0.5904404873477038,
|
| 888 |
+
"grad_norm": 2.20400071144104,
|
| 889 |
+
"learning_rate": 2.9261949390815373e-05,
|
| 890 |
+
"loss": 0.3578,
|
| 891 |
+
"step": 1260
|
| 892 |
+
},
|
| 893 |
+
{
|
| 894 |
+
"epoch": 0.5951265229615745,
|
| 895 |
+
"grad_norm": 2.1372976303100586,
|
| 896 |
+
"learning_rate": 2.9256091846298032e-05,
|
| 897 |
+
"loss": 0.3513,
|
| 898 |
+
"step": 1270
|
| 899 |
+
},
|
| 900 |
+
{
|
| 901 |
+
"epoch": 0.5998125585754451,
|
| 902 |
+
"grad_norm": 1.7900727987289429,
|
| 903 |
+
"learning_rate": 2.9250234301780695e-05,
|
| 904 |
+
"loss": 0.3022,
|
| 905 |
+
"step": 1280
|
| 906 |
+
},
|
| 907 |
+
{
|
| 908 |
+
"epoch": 0.6044985941893158,
|
| 909 |
+
"grad_norm": 1.2783315181732178,
|
| 910 |
+
"learning_rate": 2.9244376757263354e-05,
|
| 911 |
+
"loss": 0.3005,
|
| 912 |
+
"step": 1290
|
| 913 |
+
},
|
| 914 |
+
{
|
| 915 |
+
"epoch": 0.6091846298031866,
|
| 916 |
+
"grad_norm": 1.9415411949157715,
|
| 917 |
+
"learning_rate": 2.923851921274602e-05,
|
| 918 |
+
"loss": 0.2902,
|
| 919 |
+
"step": 1300
|
| 920 |
+
},
|
| 921 |
+
{
|
| 922 |
+
"epoch": 0.6138706654170571,
|
| 923 |
+
"grad_norm": 1.2527676820755005,
|
| 924 |
+
"learning_rate": 2.923266166822868e-05,
|
| 925 |
+
"loss": 0.3494,
|
| 926 |
+
"step": 1310
|
| 927 |
+
},
|
| 928 |
+
{
|
| 929 |
+
"epoch": 0.6185567010309279,
|
| 930 |
+
"grad_norm": 2.1499555110931396,
|
| 931 |
+
"learning_rate": 2.922680412371134e-05,
|
| 932 |
+
"loss": 0.3124,
|
| 933 |
+
"step": 1320
|
| 934 |
+
},
|
| 935 |
+
{
|
| 936 |
+
"epoch": 0.6232427366447985,
|
| 937 |
+
"grad_norm": 1.3738154172897339,
|
| 938 |
+
"learning_rate": 2.9220946579194e-05,
|
| 939 |
+
"loss": 0.3146,
|
| 940 |
+
"step": 1330
|
| 941 |
+
},
|
| 942 |
+
{
|
| 943 |
+
"epoch": 0.6279287722586692,
|
| 944 |
+
"grad_norm": 1.5831377506256104,
|
| 945 |
+
"learning_rate": 2.9215089034676666e-05,
|
| 946 |
+
"loss": 0.3231,
|
| 947 |
+
"step": 1340
|
| 948 |
+
},
|
| 949 |
+
{
|
| 950 |
+
"epoch": 0.6326148078725399,
|
| 951 |
+
"grad_norm": 2.8547163009643555,
|
| 952 |
+
"learning_rate": 2.9209231490159325e-05,
|
| 953 |
+
"loss": 0.3311,
|
| 954 |
+
"step": 1350
|
| 955 |
+
},
|
| 956 |
+
{
|
| 957 |
+
"epoch": 0.6373008434864105,
|
| 958 |
+
"grad_norm": 1.6369024515151978,
|
| 959 |
+
"learning_rate": 2.9203373945641987e-05,
|
| 960 |
+
"loss": 0.2868,
|
| 961 |
+
"step": 1360
|
| 962 |
+
},
|
| 963 |
+
{
|
| 964 |
+
"epoch": 0.6419868791002812,
|
| 965 |
+
"grad_norm": 1.599731206893921,
|
| 966 |
+
"learning_rate": 2.919751640112465e-05,
|
| 967 |
+
"loss": 0.322,
|
| 968 |
+
"step": 1370
|
| 969 |
+
},
|
| 970 |
+
{
|
| 971 |
+
"epoch": 0.6466729147141518,
|
| 972 |
+
"grad_norm": 1.878670573234558,
|
| 973 |
+
"learning_rate": 2.9191658856607312e-05,
|
| 974 |
+
"loss": 0.3201,
|
| 975 |
+
"step": 1380
|
| 976 |
+
},
|
| 977 |
+
{
|
| 978 |
+
"epoch": 0.6513589503280225,
|
| 979 |
+
"grad_norm": 2.1555798053741455,
|
| 980 |
+
"learning_rate": 2.918580131208997e-05,
|
| 981 |
+
"loss": 0.3317,
|
| 982 |
+
"step": 1390
|
| 983 |
+
},
|
| 984 |
+
{
|
| 985 |
+
"epoch": 0.6560449859418932,
|
| 986 |
+
"grad_norm": 2.735520124435425,
|
| 987 |
+
"learning_rate": 2.9179943767572633e-05,
|
| 988 |
+
"loss": 0.2862,
|
| 989 |
+
"step": 1400
|
| 990 |
+
},
|
| 991 |
+
{
|
| 992 |
+
"epoch": 0.6607310215557638,
|
| 993 |
+
"grad_norm": 1.7845630645751953,
|
| 994 |
+
"learning_rate": 2.91740862230553e-05,
|
| 995 |
+
"loss": 0.3107,
|
| 996 |
+
"step": 1410
|
| 997 |
+
},
|
| 998 |
+
{
|
| 999 |
+
"epoch": 0.6654170571696345,
|
| 1000 |
+
"grad_norm": 1.9131550788879395,
|
| 1001 |
+
"learning_rate": 2.9168228678537958e-05,
|
| 1002 |
+
"loss": 0.3135,
|
| 1003 |
+
"step": 1420
|
| 1004 |
+
},
|
| 1005 |
+
{
|
| 1006 |
+
"epoch": 0.6701030927835051,
|
| 1007 |
+
"grad_norm": 1.7999134063720703,
|
| 1008 |
+
"learning_rate": 2.916237113402062e-05,
|
| 1009 |
+
"loss": 0.3393,
|
| 1010 |
+
"step": 1430
|
| 1011 |
+
},
|
| 1012 |
+
{
|
| 1013 |
+
"epoch": 0.6747891283973758,
|
| 1014 |
+
"grad_norm": 1.856102466583252,
|
| 1015 |
+
"learning_rate": 2.915651358950328e-05,
|
| 1016 |
+
"loss": 0.3394,
|
| 1017 |
+
"step": 1440
|
| 1018 |
+
},
|
| 1019 |
+
{
|
| 1020 |
+
"epoch": 0.6794751640112465,
|
| 1021 |
+
"grad_norm": 1.4492303133010864,
|
| 1022 |
+
"learning_rate": 2.9150656044985945e-05,
|
| 1023 |
+
"loss": 0.2909,
|
| 1024 |
+
"step": 1450
|
| 1025 |
+
},
|
| 1026 |
+
{
|
| 1027 |
+
"epoch": 0.6841611996251171,
|
| 1028 |
+
"grad_norm": 1.517831563949585,
|
| 1029 |
+
"learning_rate": 2.9144798500468604e-05,
|
| 1030 |
+
"loss": 0.2919,
|
| 1031 |
+
"step": 1460
|
| 1032 |
+
},
|
| 1033 |
+
{
|
| 1034 |
+
"epoch": 0.6888472352389878,
|
| 1035 |
+
"grad_norm": 1.8984867334365845,
|
| 1036 |
+
"learning_rate": 2.9138940955951267e-05,
|
| 1037 |
+
"loss": 0.3282,
|
| 1038 |
+
"step": 1470
|
| 1039 |
+
},
|
| 1040 |
+
{
|
| 1041 |
+
"epoch": 0.6935332708528584,
|
| 1042 |
+
"grad_norm": 2.1415510177612305,
|
| 1043 |
+
"learning_rate": 2.9133083411433926e-05,
|
| 1044 |
+
"loss": 0.3159,
|
| 1045 |
+
"step": 1480
|
| 1046 |
+
},
|
| 1047 |
+
{
|
| 1048 |
+
"epoch": 0.6982193064667291,
|
| 1049 |
+
"grad_norm": 1.7007821798324585,
|
| 1050 |
+
"learning_rate": 2.912722586691659e-05,
|
| 1051 |
+
"loss": 0.2854,
|
| 1052 |
+
"step": 1490
|
| 1053 |
+
},
|
| 1054 |
+
{
|
| 1055 |
+
"epoch": 0.7029053420805998,
|
| 1056 |
+
"grad_norm": 1.7445827722549438,
|
| 1057 |
+
"learning_rate": 2.912136832239925e-05,
|
| 1058 |
+
"loss": 0.3308,
|
| 1059 |
+
"step": 1500
|
| 1060 |
}
|
| 1061 |
],
|
| 1062 |
"logging_steps": 10,
|