Training in progress, step 10000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 328277848
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:318c2656039c95a58242e4619aba90de89d286abfdd50c932ac46a5bbc6d6b36
|
| 3 |
size 328277848
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 318646859
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7fdbed07e432554d329c7e8d5c0f65220a1bfeee29ae26fa92a6aa0d5901ae56
|
| 3 |
size 318646859
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5948a5161f7923aa0acf66b01adf35dc2196a8acf5bd2c21227561e5bff45666
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53471871a37f3cc35b4a656a6f0cfda18046c304a91d9bf8b29b14eea2ccc156
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -6817,6 +6817,364 @@
|
|
| 6817 |
"eval_samples_per_second": 279.306,
|
| 6818 |
"eval_steps_per_second": 5.865,
|
| 6819 |
"step": 9500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6820 |
}
|
| 6821 |
],
|
| 6822 |
"logging_steps": 10,
|
|
@@ -6836,7 +7194,7 @@
|
|
| 6836 |
"attributes": {}
|
| 6837 |
}
|
| 6838 |
},
|
| 6839 |
-
"total_flos": 3.
|
| 6840 |
"train_batch_size": 48,
|
| 6841 |
"trial_name": null,
|
| 6842 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.6894745734076704,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 10000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 6817 |
"eval_samples_per_second": 279.306,
|
| 6818 |
"eval_steps_per_second": 5.865,
|
| 6819 |
"step": 9500
|
| 6820 |
+
},
|
| 6821 |
+
{
|
| 6822 |
+
"epoch": 1.6066903193106943,
|
| 6823 |
+
"grad_norm": 0.4607177972793579,
|
| 6824 |
+
"learning_rate": 3.960727047894527e-05,
|
| 6825 |
+
"loss": 4.358008575439453,
|
| 6826 |
+
"step": 9510
|
| 6827 |
+
},
|
| 6828 |
+
{
|
| 6829 |
+
"epoch": 1.608379793884102,
|
| 6830 |
+
"grad_norm": 0.49898746609687805,
|
| 6831 |
+
"learning_rate": 3.928353538569023e-05,
|
| 6832 |
+
"loss": 4.323298645019531,
|
| 6833 |
+
"step": 9520
|
| 6834 |
+
},
|
| 6835 |
+
{
|
| 6836 |
+
"epoch": 1.6100692684575098,
|
| 6837 |
+
"grad_norm": 0.4633605182170868,
|
| 6838 |
+
"learning_rate": 3.8960929302853074e-05,
|
| 6839 |
+
"loss": 4.317881393432617,
|
| 6840 |
+
"step": 9530
|
| 6841 |
+
},
|
| 6842 |
+
{
|
| 6843 |
+
"epoch": 1.6117587430309173,
|
| 6844 |
+
"grad_norm": 0.461166650056839,
|
| 6845 |
+
"learning_rate": 3.863945552014892e-05,
|
| 6846 |
+
"loss": 4.31908073425293,
|
| 6847 |
+
"step": 9540
|
| 6848 |
+
},
|
| 6849 |
+
{
|
| 6850 |
+
"epoch": 1.6134482176043252,
|
| 6851 |
+
"grad_norm": 0.46390029788017273,
|
| 6852 |
+
"learning_rate": 3.831911731574648e-05,
|
| 6853 |
+
"loss": 4.363689804077149,
|
| 6854 |
+
"step": 9550
|
| 6855 |
+
},
|
| 6856 |
+
{
|
| 6857 |
+
"epoch": 1.6151376921777327,
|
| 6858 |
+
"grad_norm": 0.47450077533721924,
|
| 6859 |
+
"learning_rate": 3.799991795623471e-05,
|
| 6860 |
+
"loss": 4.329352569580078,
|
| 6861 |
+
"step": 9560
|
| 6862 |
+
},
|
| 6863 |
+
{
|
| 6864 |
+
"epoch": 1.6168271667511402,
|
| 6865 |
+
"grad_norm": 0.4686853587627411,
|
| 6866 |
+
"learning_rate": 3.7681860696589216e-05,
|
| 6867 |
+
"loss": 4.3315582275390625,
|
| 6868 |
+
"step": 9570
|
| 6869 |
+
},
|
| 6870 |
+
{
|
| 6871 |
+
"epoch": 1.6185166413245482,
|
| 6872 |
+
"grad_norm": 0.4681236445903778,
|
| 6873 |
+
"learning_rate": 3.7364948780139344e-05,
|
| 6874 |
+
"loss": 4.294339752197265,
|
| 6875 |
+
"step": 9580
|
| 6876 |
+
},
|
| 6877 |
+
{
|
| 6878 |
+
"epoch": 1.6202061158979557,
|
| 6879 |
+
"grad_norm": 0.47375062108039856,
|
| 6880 |
+
"learning_rate": 3.70491854385351e-05,
|
| 6881 |
+
"loss": 4.285346984863281,
|
| 6882 |
+
"step": 9590
|
| 6883 |
+
},
|
| 6884 |
+
{
|
| 6885 |
+
"epoch": 1.6218955904713634,
|
| 6886 |
+
"grad_norm": 0.4612501859664917,
|
| 6887 |
+
"learning_rate": 3.673457389171401e-05,
|
| 6888 |
+
"loss": 4.301979446411133,
|
| 6889 |
+
"step": 9600
|
| 6890 |
+
},
|
| 6891 |
+
{
|
| 6892 |
+
"epoch": 1.6235850650447712,
|
| 6893 |
+
"grad_norm": 0.4734920561313629,
|
| 6894 |
+
"learning_rate": 3.642111734786833e-05,
|
| 6895 |
+
"loss": 4.337078094482422,
|
| 6896 |
+
"step": 9610
|
| 6897 |
+
},
|
| 6898 |
+
{
|
| 6899 |
+
"epoch": 1.6252745396181787,
|
| 6900 |
+
"grad_norm": 0.48585888743400574,
|
| 6901 |
+
"learning_rate": 3.610881900341261e-05,
|
| 6902 |
+
"loss": 4.291253280639649,
|
| 6903 |
+
"step": 9620
|
| 6904 |
+
},
|
| 6905 |
+
{
|
| 6906 |
+
"epoch": 1.6269640141915864,
|
| 6907 |
+
"grad_norm": 0.4632498323917389,
|
| 6908 |
+
"learning_rate": 3.579768204295063e-05,
|
| 6909 |
+
"loss": 4.331230545043946,
|
| 6910 |
+
"step": 9630
|
| 6911 |
+
},
|
| 6912 |
+
{
|
| 6913 |
+
"epoch": 1.6286534887649942,
|
| 6914 |
+
"grad_norm": 0.46583032608032227,
|
| 6915 |
+
"learning_rate": 3.54877096392434e-05,
|
| 6916 |
+
"loss": 4.336456298828125,
|
| 6917 |
+
"step": 9640
|
| 6918 |
+
},
|
| 6919 |
+
{
|
| 6920 |
+
"epoch": 1.6303429633384017,
|
| 6921 |
+
"grad_norm": 0.4624863564968109,
|
| 6922 |
+
"learning_rate": 3.5178904953176354e-05,
|
| 6923 |
+
"loss": 4.305691146850586,
|
| 6924 |
+
"step": 9650
|
| 6925 |
+
},
|
| 6926 |
+
{
|
| 6927 |
+
"epoch": 1.6320324379118094,
|
| 6928 |
+
"grad_norm": 0.4653433859348297,
|
| 6929 |
+
"learning_rate": 3.487127113372755e-05,
|
| 6930 |
+
"loss": 4.32598648071289,
|
| 6931 |
+
"step": 9660
|
| 6932 |
+
},
|
| 6933 |
+
{
|
| 6934 |
+
"epoch": 1.6337219124852171,
|
| 6935 |
+
"grad_norm": 0.4744962453842163,
|
| 6936 |
+
"learning_rate": 3.4564811317935235e-05,
|
| 6937 |
+
"loss": 4.303342819213867,
|
| 6938 |
+
"step": 9670
|
| 6939 |
+
},
|
| 6940 |
+
{
|
| 6941 |
+
"epoch": 1.6354113870586247,
|
| 6942 |
+
"grad_norm": 0.4726518392562866,
|
| 6943 |
+
"learning_rate": 3.4259528630865995e-05,
|
| 6944 |
+
"loss": 4.328373718261719,
|
| 6945 |
+
"step": 9680
|
| 6946 |
+
},
|
| 6947 |
+
{
|
| 6948 |
+
"epoch": 1.6371008616320324,
|
| 6949 |
+
"grad_norm": 0.4716176390647888,
|
| 6950 |
+
"learning_rate": 3.3955426185582826e-05,
|
| 6951 |
+
"loss": 4.309525680541992,
|
| 6952 |
+
"step": 9690
|
| 6953 |
+
},
|
| 6954 |
+
{
|
| 6955 |
+
"epoch": 1.6387903362054401,
|
| 6956 |
+
"grad_norm": 0.4661267399787903,
|
| 6957 |
+
"learning_rate": 3.365250708311352e-05,
|
| 6958 |
+
"loss": 4.324785232543945,
|
| 6959 |
+
"step": 9700
|
| 6960 |
+
},
|
| 6961 |
+
{
|
| 6962 |
+
"epoch": 1.6404798107788476,
|
| 6963 |
+
"grad_norm": 0.46032196283340454,
|
| 6964 |
+
"learning_rate": 3.335077441241895e-05,
|
| 6965 |
+
"loss": 4.306519317626953,
|
| 6966 |
+
"step": 9710
|
| 6967 |
+
},
|
| 6968 |
+
{
|
| 6969 |
+
"epoch": 1.6421692853522556,
|
| 6970 |
+
"grad_norm": 0.5246592164039612,
|
| 6971 |
+
"learning_rate": 3.305023125036148e-05,
|
| 6972 |
+
"loss": 4.312277221679688,
|
| 6973 |
+
"step": 9720
|
| 6974 |
+
},
|
| 6975 |
+
{
|
| 6976 |
+
"epoch": 1.643858759925663,
|
| 6977 |
+
"grad_norm": 0.46025800704956055,
|
| 6978 |
+
"learning_rate": 3.275088066167369e-05,
|
| 6979 |
+
"loss": 4.307319259643554,
|
| 6980 |
+
"step": 9730
|
| 6981 |
+
},
|
| 6982 |
+
{
|
| 6983 |
+
"epoch": 1.6455482344990708,
|
| 6984 |
+
"grad_norm": 0.47664591670036316,
|
| 6985 |
+
"learning_rate": 3.245272569892727e-05,
|
| 6986 |
+
"loss": 4.350948333740234,
|
| 6987 |
+
"step": 9740
|
| 6988 |
+
},
|
| 6989 |
+
{
|
| 6990 |
+
"epoch": 1.6472377090724786,
|
| 6991 |
+
"grad_norm": 0.46211037039756775,
|
| 6992 |
+
"learning_rate": 3.215576940250155e-05,
|
| 6993 |
+
"loss": 4.310560607910157,
|
| 6994 |
+
"step": 9750
|
| 6995 |
+
},
|
| 6996 |
+
{
|
| 6997 |
+
"epoch": 1.648927183645886,
|
| 6998 |
+
"grad_norm": 0.4830545485019684,
|
| 6999 |
+
"learning_rate": 3.1860014800552734e-05,
|
| 7000 |
+
"loss": 4.30987777709961,
|
| 7001 |
+
"step": 9760
|
| 7002 |
+
},
|
| 7003 |
+
{
|
| 7004 |
+
"epoch": 1.6506166582192938,
|
| 7005 |
+
"grad_norm": 0.4861840605735779,
|
| 7006 |
+
"learning_rate": 3.15654649089831e-05,
|
| 7007 |
+
"loss": 4.3120475769042965,
|
| 7008 |
+
"step": 9770
|
| 7009 |
+
},
|
| 7010 |
+
{
|
| 7011 |
+
"epoch": 1.6523061327927016,
|
| 7012 |
+
"grad_norm": 0.5054605603218079,
|
| 7013 |
+
"learning_rate": 3.1272122731409916e-05,
|
| 7014 |
+
"loss": 4.325033569335938,
|
| 7015 |
+
"step": 9780
|
| 7016 |
+
},
|
| 7017 |
+
{
|
| 7018 |
+
"epoch": 1.653995607366109,
|
| 7019 |
+
"grad_norm": 0.46032124757766724,
|
| 7020 |
+
"learning_rate": 3.097999125913518e-05,
|
| 7021 |
+
"loss": 4.310620880126953,
|
| 7022 |
+
"step": 9790
|
| 7023 |
+
},
|
| 7024 |
+
{
|
| 7025 |
+
"epoch": 1.6556850819395168,
|
| 7026 |
+
"grad_norm": 0.4689234495162964,
|
| 7027 |
+
"learning_rate": 3.068907347111485e-05,
|
| 7028 |
+
"loss": 4.30926513671875,
|
| 7029 |
+
"step": 9800
|
| 7030 |
+
},
|
| 7031 |
+
{
|
| 7032 |
+
"epoch": 1.6573745565129245,
|
| 7033 |
+
"grad_norm": 0.47660669684410095,
|
| 7034 |
+
"learning_rate": 3.0399372333928644e-05,
|
| 7035 |
+
"loss": 4.313259887695312,
|
| 7036 |
+
"step": 9810
|
| 7037 |
+
},
|
| 7038 |
+
{
|
| 7039 |
+
"epoch": 1.659064031086332,
|
| 7040 |
+
"grad_norm": 0.48029860854148865,
|
| 7041 |
+
"learning_rate": 3.0110890801749627e-05,
|
| 7042 |
+
"loss": 4.307758331298828,
|
| 7043 |
+
"step": 9820
|
| 7044 |
+
},
|
| 7045 |
+
{
|
| 7046 |
+
"epoch": 1.6607535056597398,
|
| 7047 |
+
"grad_norm": 0.46481746435165405,
|
| 7048 |
+
"learning_rate": 2.982363181631418e-05,
|
| 7049 |
+
"loss": 4.303005981445312,
|
| 7050 |
+
"step": 9830
|
| 7051 |
+
},
|
| 7052 |
+
{
|
| 7053 |
+
"epoch": 1.6624429802331475,
|
| 7054 |
+
"grad_norm": 0.4820667505264282,
|
| 7055 |
+
"learning_rate": 2.9537598306892103e-05,
|
| 7056 |
+
"loss": 4.307665634155273,
|
| 7057 |
+
"step": 9840
|
| 7058 |
+
},
|
| 7059 |
+
{
|
| 7060 |
+
"epoch": 1.664132454806555,
|
| 7061 |
+
"grad_norm": 0.4749463200569153,
|
| 7062 |
+
"learning_rate": 2.9252793190256447e-05,
|
| 7063 |
+
"loss": 4.284444427490234,
|
| 7064 |
+
"step": 9850
|
| 7065 |
+
},
|
| 7066 |
+
{
|
| 7067 |
+
"epoch": 1.665821929379963,
|
| 7068 |
+
"grad_norm": 0.46186140179634094,
|
| 7069 |
+
"learning_rate": 2.896921937065419e-05,
|
| 7070 |
+
"loss": 4.313379287719727,
|
| 7071 |
+
"step": 9860
|
| 7072 |
+
},
|
| 7073 |
+
{
|
| 7074 |
+
"epoch": 1.6675114039533705,
|
| 7075 |
+
"grad_norm": 0.45953449606895447,
|
| 7076 |
+
"learning_rate": 2.8686879739776137e-05,
|
| 7077 |
+
"loss": 4.316988754272461,
|
| 7078 |
+
"step": 9870
|
| 7079 |
+
},
|
| 7080 |
+
{
|
| 7081 |
+
"epoch": 1.669200878526778,
|
| 7082 |
+
"grad_norm": 0.4738609194755554,
|
| 7083 |
+
"learning_rate": 2.8405777176727924e-05,
|
| 7084 |
+
"loss": 4.317482376098633,
|
| 7085 |
+
"step": 9880
|
| 7086 |
+
},
|
| 7087 |
+
{
|
| 7088 |
+
"epoch": 1.670890353100186,
|
| 7089 |
+
"grad_norm": 0.46274814009666443,
|
| 7090 |
+
"learning_rate": 2.8125914548000243e-05,
|
| 7091 |
+
"loss": 4.294824600219727,
|
| 7092 |
+
"step": 9890
|
| 7093 |
+
},
|
| 7094 |
+
{
|
| 7095 |
+
"epoch": 1.6725798276735935,
|
| 7096 |
+
"grad_norm": 0.47534388303756714,
|
| 7097 |
+
"learning_rate": 2.7847294707439828e-05,
|
| 7098 |
+
"loss": 4.28771743774414,
|
| 7099 |
+
"step": 9900
|
| 7100 |
+
},
|
| 7101 |
+
{
|
| 7102 |
+
"epoch": 1.6742693022470012,
|
| 7103 |
+
"grad_norm": 0.4873548150062561,
|
| 7104 |
+
"learning_rate": 2.7569920496220398e-05,
|
| 7105 |
+
"loss": 4.304574584960937,
|
| 7106 |
+
"step": 9910
|
| 7107 |
+
},
|
| 7108 |
+
{
|
| 7109 |
+
"epoch": 1.675958776820409,
|
| 7110 |
+
"grad_norm": 0.46979424357414246,
|
| 7111 |
+
"learning_rate": 2.729379474281352e-05,
|
| 7112 |
+
"loss": 4.303669738769531,
|
| 7113 |
+
"step": 9920
|
| 7114 |
+
},
|
| 7115 |
+
{
|
| 7116 |
+
"epoch": 1.6776482513938165,
|
| 7117 |
+
"grad_norm": 0.5028051733970642,
|
| 7118 |
+
"learning_rate": 2.701892026295979e-05,
|
| 7119 |
+
"loss": 4.331151962280273,
|
| 7120 |
+
"step": 9930
|
| 7121 |
+
},
|
| 7122 |
+
{
|
| 7123 |
+
"epoch": 1.6793377259672242,
|
| 7124 |
+
"grad_norm": 0.46676331758499146,
|
| 7125 |
+
"learning_rate": 2.6745299859640318e-05,
|
| 7126 |
+
"loss": 4.332028198242187,
|
| 7127 |
+
"step": 9940
|
| 7128 |
+
},
|
| 7129 |
+
{
|
| 7130 |
+
"epoch": 1.681027200540632,
|
| 7131 |
+
"grad_norm": 0.4791058897972107,
|
| 7132 |
+
"learning_rate": 2.6472936323047972e-05,
|
| 7133 |
+
"loss": 4.310791778564453,
|
| 7134 |
+
"step": 9950
|
| 7135 |
+
},
|
| 7136 |
+
{
|
| 7137 |
+
"epoch": 1.6827166751140394,
|
| 7138 |
+
"grad_norm": 0.4680987298488617,
|
| 7139 |
+
"learning_rate": 2.6201832430558866e-05,
|
| 7140 |
+
"loss": 4.313525390625,
|
| 7141 |
+
"step": 9960
|
| 7142 |
+
},
|
| 7143 |
+
{
|
| 7144 |
+
"epoch": 1.6844061496874472,
|
| 7145 |
+
"grad_norm": 0.47215357422828674,
|
| 7146 |
+
"learning_rate": 2.5931990946704206e-05,
|
| 7147 |
+
"loss": 4.311981582641602,
|
| 7148 |
+
"step": 9970
|
| 7149 |
+
},
|
| 7150 |
+
{
|
| 7151 |
+
"epoch": 1.686095624260855,
|
| 7152 |
+
"grad_norm": 0.4663841128349304,
|
| 7153 |
+
"learning_rate": 2.5663414623141943e-05,
|
| 7154 |
+
"loss": 4.314894485473633,
|
| 7155 |
+
"step": 9980
|
| 7156 |
+
},
|
| 7157 |
+
{
|
| 7158 |
+
"epoch": 1.6877850988342624,
|
| 7159 |
+
"grad_norm": 0.4573664367198944,
|
| 7160 |
+
"learning_rate": 2.5396106198628947e-05,
|
| 7161 |
+
"loss": 4.316466903686523,
|
| 7162 |
+
"step": 9990
|
| 7163 |
+
},
|
| 7164 |
+
{
|
| 7165 |
+
"epoch": 1.6894745734076704,
|
| 7166 |
+
"grad_norm": 0.4708999693393707,
|
| 7167 |
+
"learning_rate": 2.5130068398992716e-05,
|
| 7168 |
+
"loss": 4.313570404052735,
|
| 7169 |
+
"step": 10000
|
| 7170 |
+
},
|
| 7171 |
+
{
|
| 7172 |
+
"epoch": 1.6894745734076704,
|
| 7173 |
+
"eval_loss": 4.262009620666504,
|
| 7174 |
+
"eval_runtime": 3.6505,
|
| 7175 |
+
"eval_samples_per_second": 273.932,
|
| 7176 |
+
"eval_steps_per_second": 5.753,
|
| 7177 |
+
"step": 10000
|
| 7178 |
}
|
| 7179 |
],
|
| 7180 |
"logging_steps": 10,
|
|
|
|
| 7194 |
"attributes": {}
|
| 7195 |
}
|
| 7196 |
},
|
| 7197 |
+
"total_flos": 3.344547305037496e+17,
|
| 7198 |
"train_batch_size": 48,
|
| 7199 |
"trial_name": null,
|
| 7200 |
"trial_params": null
|